diff options
author | Claude Paroz <claude@2xlibre.net> | 2019-10-04 23:23:43 +0200 |
---|---|---|
committer | Claude Paroz <claude@2xlibre.net> | 2019-10-04 23:40:24 +0200 |
commit | ca8dbcf9bea46607d22fe205498b4c071c705860 (patch) | |
tree | dd685c19557a42b7655898b2bcf11603f653dbea | |
parent | 44185350306c1cbc9a85b5682b4cb5d69a663494 (diff) | |
download | tablib-ca8dbcf9bea46607d22fe205498b4c071c705860.tar.gz |
Refs #108 - Test and improve format autodetection
Autodetection was added for the odf format.
-rw-r--r-- | HISTORY.md | 1 | ||||
-rw-r--r-- | tablib/formats/_csv.py | 2 | ||||
-rw-r--r-- | tablib/formats/_dbf.py | 6 | ||||
-rw-r--r-- | tablib/formats/_ods.py | 11 | ||||
-rw-r--r-- | tablib/formats/_xls.py | 6 | ||||
-rw-r--r-- | tablib/formats/_xlsx.py | 4 | ||||
-rwxr-xr-x | test_tablib.py | 26 |
7 files changed, 40 insertions, 16 deletions
@@ -7,6 +7,7 @@ - Fixed a regression for xlsx exports where non-string values were forced to strings (#314). - Fixed xlsx format detection (which was often detected as `xls` format). +- Improved format autodetection and added autodetection for the odf format. - Added search to all documentation pages - Open xlsx workbooks in read-only mode (#316) - Unpin requirements diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 9e8515a..5c03d6f 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -55,5 +55,5 @@ def detect(stream, delimiter=DEFAULT_DELIMITER): try: csv.Sniffer().sniff(stream, delimiters=delimiter) return True - except (csv.Error, TypeError): + except Exception: return False diff --git a/tablib/formats/_dbf.py b/tablib/formats/_dbf.py index 710797d..0d1c87b 100644 --- a/tablib/formats/_dbf.py +++ b/tablib/formats/_dbf.py @@ -83,9 +83,5 @@ def detect(stream): else: _dbf = dbf.Dbf(StringIO(stream), readOnly=True) return True - except (ValueError, struct.error): - # When we try to open up a file that's not a DBF, dbfpy raises a - # ValueError. - # When unpacking a string argument with less than 8 chars, struct.error is - # raised. + except Exception: return False diff --git a/tablib/formats/_ods.py b/tablib/formats/_ods.py index 5b900b5..dbf57c4 100644 --- a/tablib/formats/_ods.py +++ b/tablib/formats/_ods.py @@ -91,3 +91,14 @@ def dset_sheet(dataset, ws): cell = table.TableCell() cell.addElement(text.P(text=col)) odf_row.addElement(cell) + + +def detect(stream): + if isinstance(stream, bytes): + # load expects a file-like object. + stream = BytesIO(stream) + try: + opendocument.load(stream) + return True + except Exception: + return False diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index baa7904..88e8636 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -25,17 +25,17 @@ def detect(stream): try: xlrd.open_workbook(file_contents=stream) return True - except (TypeError, XLRDError): + except Exception: pass try: xlrd.open_workbook(file_contents=stream.read()) return True - except (AttributeError, XLRDError): + except Exception: pass try: xlrd.open_workbook(filename=stream) return True - except: + except Exception: return False diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py index 516191c..f8f21c2 100644 --- a/tablib/formats/_xlsx.py +++ b/tablib/formats/_xlsx.py @@ -28,8 +28,8 @@ def detect(stream): try: openpyxl.reader.excel.load_workbook(stream, read_only=True) return True - except openpyxl.shared.exc.InvalidFileException: - pass + except Exception: + return False def export_set(dataset, freeze_panes=True): """Returns XLSX representation of Dataset.""" diff --git a/test_tablib.py b/test_tablib.py index f15724d..216ca88 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -288,17 +288,33 @@ class TablibTestCase(BaseTestCase): def test_auto_format_detect(self): """Test auto format detection.""" + # html, jira, latex, rst are export only. + + _xls = self.founders.export('xls') + self.assertEqual(tablib.detect_format(_xls), 'xls') + + _xlsx = self.founders.export('xlsx') + self.assertEqual(tablib.detect_format(_xlsx), 'xlsx') + + _ods = self.founders.export('ods') + self.assertEqual(tablib.detect_format(_ods), 'ods') + + _df = self.founders.export('df') + self.assertEqual(tablib.detect_format(_df), 'df') _yaml = '- {age: 90, first_name: John, last_name: Adams}' + self.assertEqual(tablib.detect_format(_yaml), 'yaml') + _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' - _csv = '1,2,3\n4,5,6\n7,8,9\n' - _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n' - _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + self.assertEqual(tablib.detect_format(_json), 'json') - self.assertEqual(tablib.detect_format(_yaml), 'yaml') + _csv = '1,2,3\n4,5,6\n7,8,9\n' self.assertEqual(tablib.detect_format(_csv), 'csv') + + _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n' self.assertEqual(tablib.detect_format(_tsv), 'tsv') - self.assertEqual(tablib.detect_format(_json), 'json') + + _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' self.assertEqual(tablib.detect_format(_bunk), None) def test_transpose(self): |