From 96628d39efe4595cc320811efc500f1c38ed1d55 Mon Sep 17 00:00:00 2001 From: Stuart Rackham Date: Mon, 18 Oct 2010 12:46:10 +1300 Subject: FIXED: ``UnicodeDecodeError: 'ascii' codec can't decode byte''. This error is due to a limitation in the Python HTMLParser module, see: http://bugs.python.org/issue3932 Resolved by assuming UTF-8 input (this assumption is not always going be correct but it will be correct more often that not doing anything and assuming an ascii encoding. --- a2x.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'a2x.py') diff --git a/a2x.py b/a2x.py index cd95c99..8ec66c4 100755 --- a/a2x.py +++ b/a2x.py @@ -237,7 +237,9 @@ def find_resources(files, tagname, attrname, filter=None): if OPTIONS.dry_run: continue parser = FindResources() - parser.feed(open(f).read()) + # UTF-8 is a better bet than the default ASCII. + # See http://bugs.python.org/issue3932 + parser.feed(open(f).read().decode('utf8')) parser.close() result = list(set(result)) # Drop duplicate values. result.sort() -- cgit v1.2.1