Refs #108 - Test and improve format autodetection

Autodetection was added for the odf format.
author: Claude Paroz <claude@2xlibre.net> 2019-10-04 23:23:43 +0200
committer: Claude Paroz <claude@2xlibre.net> 2019-10-04 23:40:24 +0200
commit: ca8dbcf9bea46607d22fe205498b4c071c705860 (patch)
tree: dd685c19557a42b7655898b2bcf11603f653dbea
parent: 44185350306c1cbc9a85b5682b4cb5d69a663494 (diff)
download: tablib-ca8dbcf9bea46607d22fe205498b4c071c705860.tar.gz
7 files changed, 40 insertions, 16 deletions
diff --git a/HISTORY.md b/HISTORY.md
index 2d92422..0450831 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -7,6 +7,7 @@
 - Fixed a regression for xlsx exports where non-string values were forced to
   strings (#314).
 - Fixed xlsx format detection (which was often detected as `xls` format).
+- Improved format autodetection and added autodetection for the odf format.
 - Added search to all documentation pages
 - Open xlsx workbooks in read-only mode (#316)
 - Unpin requirements
diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py
index 9e8515a..5c03d6f 100644
--- a/tablib/formats/_csv.py
+++ b/tablib/formats/_csv.py
@@ -55,5 +55,5 @@ def detect(stream, delimiter=DEFAULT_DELIMITER):
     try:
         csv.Sniffer().sniff(stream, delimiters=delimiter)
         return True
-    except (csv.Error, TypeError):
+    except Exception:
         return False
diff --git a/tablib/formats/_dbf.py b/tablib/formats/_dbf.py
index 710797d..0d1c87b 100644
--- a/tablib/formats/_dbf.py
+++ b/tablib/formats/_dbf.py
@@ -83,9 +83,5 @@ def detect(stream):
         else:
             _dbf = dbf.Dbf(StringIO(stream), readOnly=True)
         return True
-    except (ValueError, struct.error):
-        # When we try to open up a file that's not a DBF, dbfpy raises a
-        # ValueError.
-        # When unpacking a string argument with less than 8 chars, struct.error is
-        # raised.
+    except Exception:
         return False
diff --git a/tablib/formats/_ods.py b/tablib/formats/_ods.py
index 5b900b5..dbf57c4 100644
--- a/tablib/formats/_ods.py
+++ b/tablib/formats/_ods.py
@@ -91,3 +91,14 @@ def dset_sheet(dataset, ws):
                     cell = table.TableCell()
                     cell.addElement(text.P(text=col))
                     odf_row.addElement(cell)
+
+
+def detect(stream):
+    if isinstance(stream, bytes):
+        # load expects a file-like object.
+        stream = BytesIO(stream)
+    try:
+        opendocument.load(stream)
+        return True
+    except Exception:
+        return False
diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py
index baa7904..88e8636 100644
--- a/tablib/formats/_xls.py
+++ b/tablib/formats/_xls.py
@@ -25,17 +25,17 @@ def detect(stream):
     try:
         xlrd.open_workbook(file_contents=stream)
         return True
-    except (TypeError, XLRDError):
+    except Exception:
         pass
     try:
         xlrd.open_workbook(file_contents=stream.read())
         return True
-    except (AttributeError, XLRDError):
+    except Exception:
         pass
     try:
         xlrd.open_workbook(filename=stream)
         return True
-    except:
+    except Exception:
         return False
 
 
diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py
index 516191c..f8f21c2 100644
--- a/tablib/formats/_xlsx.py
+++ b/tablib/formats/_xlsx.py
@@ -28,8 +28,8 @@ def detect(stream):
     try:
         openpyxl.reader.excel.load_workbook(stream, read_only=True)
         return True
-    except openpyxl.shared.exc.InvalidFileException:
-        pass
+    except Exception:
+        return False
 
 def export_set(dataset, freeze_panes=True):
     """Returns XLSX representation of Dataset."""
diff --git a/test_tablib.py b/test_tablib.py
index f15724d..216ca88 100755
--- a/test_tablib.py
+++ b/test_tablib.py
@@ -288,17 +288,33 @@ class TablibTestCase(BaseTestCase):
 
     def test_auto_format_detect(self):
         """Test auto format detection."""
+        # html, jira, latex, rst are export only.
+
+        _xls = self.founders.export('xls')
+        self.assertEqual(tablib.detect_format(_xls), 'xls')
+
+        _xlsx = self.founders.export('xlsx')
+        self.assertEqual(tablib.detect_format(_xlsx), 'xlsx')
+
+        _ods = self.founders.export('ods')
+        self.assertEqual(tablib.detect_format(_ods), 'ods')
+
+        _df = self.founders.export('df')
+        self.assertEqual(tablib.detect_format(_df), 'df')
 
         _yaml = '- {age: 90, first_name: John, last_name: Adams}'
+        self.assertEqual(tablib.detect_format(_yaml), 'yaml')
+
         _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
-        _csv = '1,2,3\n4,5,6\n7,8,9\n'
-        _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
-        _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
+        self.assertEqual(tablib.detect_format(_json), 'json')
 
-        self.assertEqual(tablib.detect_format(_yaml), 'yaml')
+        _csv = '1,2,3\n4,5,6\n7,8,9\n'
         self.assertEqual(tablib.detect_format(_csv), 'csv')
+
+        _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
         self.assertEqual(tablib.detect_format(_tsv), 'tsv')
-        self.assertEqual(tablib.detect_format(_json), 'json')
+
+        _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
         self.assertEqual(tablib.detect_format(_bunk), None)
 
     def test_transpose(self):
author	Claude Paroz <claude@2xlibre.net>	2019-10-04 23:23:43 +0200
committer	Claude Paroz <claude@2xlibre.net>	2019-10-04 23:40:24 +0200
commit	ca8dbcf9bea46607d22fe205498b4c071c705860 (patch)
tree	dd685c19557a42b7655898b2bcf11603f653dbea
parent	44185350306c1cbc9a85b5682b4cb5d69a663494 (diff)
download	tablib-ca8dbcf9bea46607d22fe205498b4c071c705860.tar.gz