summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2018-07-14 14:24:36 -0400
committerLeonard Richardson <leonardr@segfault.org>2018-07-14 14:24:36 -0400
commitd48fd72468023868ade770abe8ea824bff7df4cc (patch)
tree3085b2addf0ab3b87ae5a3b2e050cfa29e620a4b
parent73b0fdbccb599c5bb77d7727af74c0d73a72e41d (diff)
downloadbeautifulsoup4-d48fd72468023868ade770abe8ea824bff7df4cc.tar.gz
Fixed a Windows crash in diagnose() when checking whether a long
markup string is a filename. [bug=1737121]
-rw-r--r--NEWS.txt3
-rw-r--r--bs4/diagnose.py20
2 files changed, 16 insertions, 7 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 2fc0a6e..2437e83 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -5,6 +5,9 @@
* Fixed code that was causing deprecation warnings in recent Python 3
versions. Includes a patch from Ville Skyttä. [bug=1778909] [bug=1689496]
+* Fixed a Windows crash in diagnose() when checking whether a long
+ markup string is a filename. [bug=1737121]
+
= 4.6.0 (20170507) =
* Added the `Tag.get_attribute_list` method, which acts like `Tag.get` for
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index 8768332..7a28c09 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -37,7 +37,7 @@ def diagnose(data):
name)
if 'lxml' in basic_parsers:
- basic_parsers.append(["lxml", "xml"])
+ basic_parsers.append("lxml-xml")
try:
from lxml import etree
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
@@ -56,21 +56,27 @@ def diagnose(data):
if hasattr(data, 'read'):
data = data.read()
- elif os.path.exists(data):
- print '"%s" looks like a filename. Reading data from the file.' % data
- with open(data) as fp:
- data = fp.read()
elif data.startswith("http:") or data.startswith("https:"):
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
return
- print
+ else:
+ try:
+ if os.path.exists(data):
+ print '"%s" looks like a filename. Reading data from the file.' % data
+ with open(data) as fp:
+ data = fp.read()
+ except ValueError:
+ # This can happen on some platforms when the 'filename' is
+ # too long. Assume it's data and not a filename.
+ pass
+ print
for parser in basic_parsers:
print "Trying to parse your markup with %s" % parser
success = False
try:
- soup = BeautifulSoup(data, parser)
+ soup = BeautifulSoup(data, features=parser)
success = True
except Exception, e:
print "%s could not parse the markup." % parser