diff options
-rw-r--r-- | NEWS.txt | 3 | ||||
-rw-r--r-- | bs4/diagnose.py | 20 |
2 files changed, 16 insertions, 7 deletions
@@ -5,6 +5,9 @@ * Fixed code that was causing deprecation warnings in recent Python 3 versions. Includes a patch from Ville Skyttä. [bug=1778909] [bug=1689496] +* Fixed a Windows crash in diagnose() when checking whether a long + markup string is a filename. [bug=1737121] + = 4.6.0 (20170507) = * Added the `Tag.get_attribute_list` method, which acts like `Tag.get` for diff --git a/bs4/diagnose.py b/bs4/diagnose.py index 8768332..7a28c09 100644 --- a/bs4/diagnose.py +++ b/bs4/diagnose.py @@ -37,7 +37,7 @@ def diagnose(data): name) if 'lxml' in basic_parsers: - basic_parsers.append(["lxml", "xml"]) + basic_parsers.append("lxml-xml") try: from lxml import etree print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) @@ -56,21 +56,27 @@ def diagnose(data): if hasattr(data, 'read'): data = data.read() - elif os.path.exists(data): - print '"%s" looks like a filename. Reading data from the file.' % data - with open(data) as fp: - data = fp.read() elif data.startswith("http:") or data.startswith("https:"): print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." return - print + else: + try: + if os.path.exists(data): + print '"%s" looks like a filename. Reading data from the file.' % data + with open(data) as fp: + data = fp.read() + except ValueError: + # This can happen on some platforms when the 'filename' is + # too long. Assume it's data and not a filename. + pass + print for parser in basic_parsers: print "Trying to parse your markup with %s" % parser success = False try: - soup = BeautifulSoup(data, parser) + soup = BeautifulSoup(data, features=parser) success = True except Exception, e: print "%s could not parse the markup." % parser |