summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClaude Paroz <claude@2xlibre.net>2019-10-04 23:23:43 +0200
committerClaude Paroz <claude@2xlibre.net>2019-10-04 23:40:24 +0200
commitca8dbcf9bea46607d22fe205498b4c071c705860 (patch)
treedd685c19557a42b7655898b2bcf11603f653dbea
parent44185350306c1cbc9a85b5682b4cb5d69a663494 (diff)
downloadtablib-ca8dbcf9bea46607d22fe205498b4c071c705860.tar.gz
Refs #108 - Test and improve format autodetection
Autodetection was added for the odf format.
-rw-r--r--HISTORY.md1
-rw-r--r--tablib/formats/_csv.py2
-rw-r--r--tablib/formats/_dbf.py6
-rw-r--r--tablib/formats/_ods.py11
-rw-r--r--tablib/formats/_xls.py6
-rw-r--r--tablib/formats/_xlsx.py4
-rwxr-xr-xtest_tablib.py26
7 files changed, 40 insertions, 16 deletions
diff --git a/HISTORY.md b/HISTORY.md
index 2d92422..0450831 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -7,6 +7,7 @@
- Fixed a regression for xlsx exports where non-string values were forced to
strings (#314).
- Fixed xlsx format detection (which was often detected as `xls` format).
+- Improved format autodetection and added autodetection for the odf format.
- Added search to all documentation pages
- Open xlsx workbooks in read-only mode (#316)
- Unpin requirements
diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py
index 9e8515a..5c03d6f 100644
--- a/tablib/formats/_csv.py
+++ b/tablib/formats/_csv.py
@@ -55,5 +55,5 @@ def detect(stream, delimiter=DEFAULT_DELIMITER):
try:
csv.Sniffer().sniff(stream, delimiters=delimiter)
return True
- except (csv.Error, TypeError):
+ except Exception:
return False
diff --git a/tablib/formats/_dbf.py b/tablib/formats/_dbf.py
index 710797d..0d1c87b 100644
--- a/tablib/formats/_dbf.py
+++ b/tablib/formats/_dbf.py
@@ -83,9 +83,5 @@ def detect(stream):
else:
_dbf = dbf.Dbf(StringIO(stream), readOnly=True)
return True
- except (ValueError, struct.error):
- # When we try to open up a file that's not a DBF, dbfpy raises a
- # ValueError.
- # When unpacking a string argument with less than 8 chars, struct.error is
- # raised.
+ except Exception:
return False
diff --git a/tablib/formats/_ods.py b/tablib/formats/_ods.py
index 5b900b5..dbf57c4 100644
--- a/tablib/formats/_ods.py
+++ b/tablib/formats/_ods.py
@@ -91,3 +91,14 @@ def dset_sheet(dataset, ws):
cell = table.TableCell()
cell.addElement(text.P(text=col))
odf_row.addElement(cell)
+
+
+def detect(stream):
+ if isinstance(stream, bytes):
+ # load expects a file-like object.
+ stream = BytesIO(stream)
+ try:
+ opendocument.load(stream)
+ return True
+ except Exception:
+ return False
diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py
index baa7904..88e8636 100644
--- a/tablib/formats/_xls.py
+++ b/tablib/formats/_xls.py
@@ -25,17 +25,17 @@ def detect(stream):
try:
xlrd.open_workbook(file_contents=stream)
return True
- except (TypeError, XLRDError):
+ except Exception:
pass
try:
xlrd.open_workbook(file_contents=stream.read())
return True
- except (AttributeError, XLRDError):
+ except Exception:
pass
try:
xlrd.open_workbook(filename=stream)
return True
- except:
+ except Exception:
return False
diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py
index 516191c..f8f21c2 100644
--- a/tablib/formats/_xlsx.py
+++ b/tablib/formats/_xlsx.py
@@ -28,8 +28,8 @@ def detect(stream):
try:
openpyxl.reader.excel.load_workbook(stream, read_only=True)
return True
- except openpyxl.shared.exc.InvalidFileException:
- pass
+ except Exception:
+ return False
def export_set(dataset, freeze_panes=True):
"""Returns XLSX representation of Dataset."""
diff --git a/test_tablib.py b/test_tablib.py
index f15724d..216ca88 100755
--- a/test_tablib.py
+++ b/test_tablib.py
@@ -288,17 +288,33 @@ class TablibTestCase(BaseTestCase):
def test_auto_format_detect(self):
"""Test auto format detection."""
+ # html, jira, latex, rst are export only.
+
+ _xls = self.founders.export('xls')
+ self.assertEqual(tablib.detect_format(_xls), 'xls')
+
+ _xlsx = self.founders.export('xlsx')
+ self.assertEqual(tablib.detect_format(_xlsx), 'xlsx')
+
+ _ods = self.founders.export('ods')
+ self.assertEqual(tablib.detect_format(_ods), 'ods')
+
+ _df = self.founders.export('df')
+ self.assertEqual(tablib.detect_format(_df), 'df')
_yaml = '- {age: 90, first_name: John, last_name: Adams}'
+ self.assertEqual(tablib.detect_format(_yaml), 'yaml')
+
_json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
- _csv = '1,2,3\n4,5,6\n7,8,9\n'
- _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
- _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
+ self.assertEqual(tablib.detect_format(_json), 'json')
- self.assertEqual(tablib.detect_format(_yaml), 'yaml')
+ _csv = '1,2,3\n4,5,6\n7,8,9\n'
self.assertEqual(tablib.detect_format(_csv), 'csv')
+
+ _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
self.assertEqual(tablib.detect_format(_tsv), 'tsv')
- self.assertEqual(tablib.detect_format(_json), 'json')
+
+ _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
self.assertEqual(tablib.detect_format(_bunk), None)
def test_transpose(self):