From c4cb9ff2a794e7606e240e4da73dcc837ec175df Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Sun, 12 Aug 2018 10:58:24 -0400 Subject: Converted README to Markdown format. --- NEWS.txt | 2 +- README.md | 75 +++++++++++++++++++++++++++++++++++++++++++++++++ README.txt | 63 ----------------------------------------- bs4/builder/__init__.py | 6 ++++ setup.py | 6 +++- 5 files changed, 87 insertions(+), 65 deletions(-) create mode 100644 README.md delete mode 100644 README.txt diff --git a/NEWS.txt b/NEWS.txt index a18b623..d1ae91c 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -1,4 +1,4 @@ -= 4.6.2 (unreleased) += 4.6.2 (20180812) * Fix an exception when a custom formatter was asked to format a void element. [bug=1784408] diff --git a/README.md b/README.md new file mode 100644 index 0000000..91689d4 --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +Beautiful Soup is a library that makes it easy to scrape information +from web pages. It sits atop an HTML or XML parser, providing Pythonic +idioms for iterating, searching, and modifying the parse tree. + +# Quick start + +``` + >>> from bs4 import BeautifulSoup + >>> soup = BeautifulSoup("

SomebadHTML") + >>> print soup.prettify() + + +

+ Some + + bad + + HTML + + +

+ + + >>> soup.find(text="bad") + u'bad' + + >>> soup.i + HTML + + >>> soup = BeautifulSoup("SomebadXML", "xml") + >>> print soup.prettify() + + + Some + + bad + + XML + + +``` + +To go beyond the basics, [comprehensive documentation is available](http://www.crummy.com/software/BeautifulSoup/bs4/doc/). + +# Links + +* [Homepage](http://www.crummy.com/software/BeautifulSoup/bs4/) +* [Documentation](http://www.crummy.com/software/BeautifulSoup/bs4/doc/) +* [Discussion group](http://groups.google.com/group/beautifulsoup/) +* [Development](https://code.launchpad.net/beautifulsoup/) +* [Bug tracker](https://bugs.launchpad.net/beautifulsoup/) +* [Complete changelog](https://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/view/head:/NEWS.txt) + +# Building the documentation + +The bs4/doc/ directory contains full documentation in Sphinx +format. Run `make html` in that directory to create HTML +documentation. + +# Running the unit tests + +Beautiful Soup supports unit test discovery from the project root directory: + +``` + $ nosetests +``` + +``` + $ python -m unittest discover -s bs4 # Python 2.7 and up +``` + +If you checked out the source tree, you should see a script in the +home directory called test-all-versions. This script will run the unit +tests under Python 2.7, then create a temporary Python 3 conversion of +the source and run the unit tests again under Python 3. diff --git a/README.txt b/README.txt deleted file mode 100644 index 305c51e..0000000 --- a/README.txt +++ /dev/null @@ -1,63 +0,0 @@ -= Introduction = - - >>> from bs4 import BeautifulSoup - >>> soup = BeautifulSoup("
SomebadHTML") - >>> print soup.prettify() - - -
- Some - - bad - - HTML - - -
- - - >>> soup.find(text="bad") - u'bad' - - >>> soup.i - HTML - - >>> soup = BeautifulSoup("SomebadXML", "xml") - >>> print soup.prettify() - - - Some - - bad - - XML - - - -= Full documentation = - -The bs4/doc/ directory contains full documentation in Sphinx -format. Run "make html" in that directory to create HTML -documentation. - -= Running the unit tests = - -Beautiful Soup supports unit test discovery from the project root directory: - - $ nosetests - - $ python -m unittest discover -s bs4 # Python 2.7 and up - -If you checked out the source tree, you should see a script in the -home directory called test-all-versions. This script will run the unit -tests under Python 2.7, then create a temporary Python 3 conversion of -the source and run the unit tests again under Python 3. - -= Links = - -Homepage: http://www.crummy.com/software/BeautifulSoup/bs4/ -Documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/ - http://readthedocs.org/docs/beautiful-soup-4/ -Discussion group: http://groups.google.com/group/beautifulsoup/ -Development: https://code.launchpad.net/beautifulsoup/ -Bug tracker: https://bugs.launchpad.net/beautifulsoup/ diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py index 21454e6..c9e3f3d 100644 --- a/bs4/builder/__init__.py +++ b/bs4/builder/__init__.py @@ -239,6 +239,12 @@ class HTMLTreeBuilder(TreeBuilder): # These are from earlier versions of HTML and are removed in HTML5. 'basefont', 'bgsound', 'command', 'frame', 'image', 'isindex', 'nextid', 'spacer' ]) + + # The HTML standard defines these as block-level elements. Beautiful + # Soup does not treat these elements differently from other elements, + # but it may do so eventually, and this information is available if + # you need to use it. + block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]) # The HTML standard defines these attributes as containing a # space-separated list of values, not a single value. That is, diff --git a/setup.py b/setup.py index 93e8ce3..b2a7ddf 100644 --- a/setup.py +++ b/setup.py @@ -3,6 +3,9 @@ from setuptools import ( find_packages, ) +with open("README.md", "r") as fh: + long_description = fh.read() + setup( name="beautifulsoup4", version = "4.6.1", @@ -11,7 +14,8 @@ setup( url="http://www.crummy.com/software/BeautifulSoup/bs4/", download_url = "http://www.crummy.com/software/BeautifulSoup/bs4/download/", description="Screen-scraping library", - long_description="""Beautiful Soup sits atop an HTML or XML parser, providing Pythonic idioms for iterating, searching, and modifying the parse tree.""", + long_description=long_description, + long_description_content_type="text/markdown", license="MIT", packages=find_packages(exclude=['tests*']), extras_require = { -- cgit v1.2.1