summaryrefslogtreecommitdiff
path: root/src/pyparsing.py
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2015-11-25 19:53:19 +0000
committerPaul McGuire <ptmcg@austin.rr.com>2015-11-25 19:53:19 +0000
commit7ae4f0cb4ae5360ce12c14032c314d82e1d206b8 (patch)
treeb989cf797f6faca30639ba3acc2204c706cdc1b3 /src/pyparsing.py
parent30e376729d795149fd9ddfb90cceda4fadf71f1e (diff)
downloadpyparsing-git-7ae4f0cb4ae5360ce12c14032c314d82e1d206b8.tar.gz
Cleaned up additional issues from enhancing the error messages for Or and MatchFirst, handling Unicode values in expressions. Fixes Unicode encoding issues in Python 2.
Diffstat (limited to 'src/pyparsing.py')
-rw-r--r--src/pyparsing.py19
1 files changed, 6 insertions, 13 deletions
diff --git a/src/pyparsing.py b/src/pyparsing.py
index 186bc45..f30feb9 100644
--- a/src/pyparsing.py
+++ b/src/pyparsing.py
@@ -123,18 +123,11 @@ else:
return str(obj)
except UnicodeEncodeError:
- # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
- # state that "The return value must be a string object". However, does a
- # unicode object (being a subclass of basestring) count as a "string
- # object"?
- # If so, then return a unicode object:
- return unicode(obj)
- # Else encode it... but how? There are many choices... :)
- # Replace unprintables with escape codes?
- #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
- # Replace unprintables with question marks?
- #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
- # ...
+ # Else encode it
+ ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
+ xmlcharref = Regex('&#\d+;')
+ xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
+ return xmlcharref.transformString(ret)
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
singleArgBuiltins = []
@@ -2351,7 +2344,7 @@ class ParseExpression(ParserElement):
self.mayReturnEmpty |= other.mayReturnEmpty
self.mayIndexError |= other.mayIndexError
- self.errmsg = "Expected " + str(self)
+ self.errmsg = "Expected " + _ustr(self)
return self