Fixed a Windows crash in diagnose() when checking whether a long

markup string is a filename. [bug=1737121]
author: Leonard Richardson <leonardr@segfault.org> 2018-07-14 14:24:36 -0400
committer: Leonard Richardson <leonardr@segfault.org> 2018-07-14 14:24:36 -0400
commit: d48fd72468023868ade770abe8ea824bff7df4cc (patch)
tree: 3085b2addf0ab3b87ae5a3b2e050cfa29e620a4b
parent: 73b0fdbccb599c5bb77d7727af74c0d73a72e41d (diff)
download: beautifulsoup4-d48fd72468023868ade770abe8ea824bff7df4cc.tar.gz
2 files changed, 16 insertions, 7 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 2fc0a6e..2437e83 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -5,6 +5,9 @@
 * Fixed code that was causing deprecation warnings in recent Python 3
   versions. Includes a patch from Ville Skyttä. [bug=1778909] [bug=1689496]
 
+* Fixed a Windows crash in diagnose() when checking whether a long
+  markup string is a filename. [bug=1737121]
+
 = 4.6.0 (20170507) =
 
 * Added the `Tag.get_attribute_list` method, which acts like `Tag.get` for
diff --git a/bs4/diagnose.py b/bs4/diagnose.py
index 8768332..7a28c09 100644
--- a/bs4/diagnose.py
+++ b/bs4/diagnose.py
@@ -37,7 +37,7 @@ def diagnose(data):
                 name)
 
     if 'lxml' in basic_parsers:
-        basic_parsers.append(["lxml", "xml"])
+        basic_parsers.append("lxml-xml")
         try:
             from lxml import etree
             print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
@@ -56,21 +56,27 @@ def diagnose(data):
 
     if hasattr(data, 'read'):
         data = data.read()
-    elif os.path.exists(data):
-        print '"%s" looks like a filename. Reading data from the file.' % data
-        with open(data) as fp:
-            data = fp.read()
     elif data.startswith("http:") or data.startswith("https:"):
         print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
         print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
         return
-    print
+    else:
+        try:
+            if os.path.exists(data):
+                print '"%s" looks like a filename. Reading data from the file.' % data
+                with open(data) as fp:
+                    data = fp.read()
+        except ValueError:
+            # This can happen on some platforms when the 'filename' is
+            # too long. Assume it's data and not a filename.
+            pass
+        print
 
     for parser in basic_parsers:
         print "Trying to parse your markup with %s" % parser
         success = False
         try:
-            soup = BeautifulSoup(data, parser)
+            soup = BeautifulSoup(data, features=parser)
             success = True
         except Exception, e:
             print "%s could not parse the markup." % parser
author	Leonard Richardson <leonardr@segfault.org>	2018-07-14 14:24:36 -0400
committer	Leonard Richardson <leonardr@segfault.org>	2018-07-14 14:24:36 -0400
commit	d48fd72468023868ade770abe8ea824bff7df4cc (patch)
tree	3085b2addf0ab3b87ae5a3b2e050cfa29e620a4b
parent	73b0fdbccb599c5bb77d7727af74c0d73a72e41d (diff)
download	beautifulsoup4-d48fd72468023868ade770abe8ea824bff7df4cc.tar.gz