diff options
author | Leonard Richardson <leonardr@segfault.org> | 2018-07-14 14:24:36 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2018-07-14 14:24:36 -0400 |
commit | d48fd72468023868ade770abe8ea824bff7df4cc (patch) | |
tree | 3085b2addf0ab3b87ae5a3b2e050cfa29e620a4b | |
parent | 73b0fdbccb599c5bb77d7727af74c0d73a72e41d (diff) | |
download | beautifulsoup4-d48fd72468023868ade770abe8ea824bff7df4cc.tar.gz |
Fixed a Windows crash in diagnose() when checking whether a long
markup string is a filename. [bug=1737121]
-rw-r--r-- | NEWS.txt | 3 | ||||
-rw-r--r-- | bs4/diagnose.py | 20 |
2 files changed, 16 insertions, 7 deletions
@@ -5,6 +5,9 @@ * Fixed code that was causing deprecation warnings in recent Python 3 versions. Includes a patch from Ville Skyttä. [bug=1778909] [bug=1689496] +* Fixed a Windows crash in diagnose() when checking whether a long + markup string is a filename. [bug=1737121] + = 4.6.0 (20170507) = * Added the `Tag.get_attribute_list` method, which acts like `Tag.get` for diff --git a/bs4/diagnose.py b/bs4/diagnose.py index 8768332..7a28c09 100644 --- a/bs4/diagnose.py +++ b/bs4/diagnose.py @@ -37,7 +37,7 @@ def diagnose(data): name) if 'lxml' in basic_parsers: - basic_parsers.append(["lxml", "xml"]) + basic_parsers.append("lxml-xml") try: from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) @@ -56,21 +56,27 @@ def diagnose(data): if hasattr(data, 'read'): data = data.read() - elif os.path.exists(data): - print '"%s" looks like a filename. Reading data from the file.' % data - with open(data) as fp: - data = fp.read() elif data.startswith("http:") or data.startswith("https:"): print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." return - print + else: + try: + if os.path.exists(data): + print '"%s" looks like a filename. Reading data from the file.' % data + with open(data) as fp: + data = fp.read() + except ValueError: + # This can happen on some platforms when the 'filename' is + # too long. Assume it's data and not a filename. + pass + print for parser in basic_parsers: print "Trying to parse your markup with %s" % parser success = False try: - soup = BeautifulSoup(data, parser) + soup = BeautifulSoup(data, features=parser) success = True except Exception, e: print "%s could not parse the markup." % parser |