From 96628d39efe4595cc320811efc500f1c38ed1d55 Mon Sep 17 00:00:00 2001
From: Stuart Rackham <srackham@methods.co.nz>
Date: Mon, 18 Oct 2010 12:46:10 +1300
Subject: FIXED: ``UnicodeDecodeError: 'ascii' codec can't decode byte''.  This
 error is due to a limitation in the Python HTMLParser module, see:
 http://bugs.python.org/issue3932 Resolved by assuming UTF-8 input (this
 assumption is not always going be correct but it will be correct more often
 that not doing anything and assuming an ascii encoding.

---
 a2x.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'a2x.py')

diff --git a/a2x.py b/a2x.py
index cd95c99..8ec66c4 100755
--- a/a2x.py
+++ b/a2x.py
@@ -237,7 +237,9 @@ def find_resources(files, tagname, attrname, filter=None):
         if OPTIONS.dry_run:
             continue
         parser = FindResources()
-        parser.feed(open(f).read())
+        # UTF-8 is a better bet than the default ASCII.
+        # See http://bugs.python.org/issue3932
+        parser.feed(open(f).read().decode('utf8'))
         parser.close()
     result = list(set(result))   # Drop duplicate values.
     result.sort()
-- 
cgit v1.2.1