diff options
author | Paul McGuire <ptmcg@austin.rr.com> | 2015-11-25 19:53:19 +0000 |
---|---|---|
committer | Paul McGuire <ptmcg@austin.rr.com> | 2015-11-25 19:53:19 +0000 |
commit | 7ae4f0cb4ae5360ce12c14032c314d82e1d206b8 (patch) | |
tree | b989cf797f6faca30639ba3acc2204c706cdc1b3 /src/pyparsing.py | |
parent | 30e376729d795149fd9ddfb90cceda4fadf71f1e (diff) | |
download | pyparsing-git-7ae4f0cb4ae5360ce12c14032c314d82e1d206b8.tar.gz |
Cleaned up additional issues from enhancing the error messages for Or and MatchFirst, handling Unicode values in expressions. Fixes Unicode encoding issues in Python 2.
Diffstat (limited to 'src/pyparsing.py')
-rw-r--r-- | src/pyparsing.py | 19 |
1 files changed, 6 insertions, 13 deletions
diff --git a/src/pyparsing.py b/src/pyparsing.py index 186bc45..f30feb9 100644 --- a/src/pyparsing.py +++ b/src/pyparsing.py @@ -123,18 +123,11 @@ else: return str(obj)
except UnicodeEncodeError:
- # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
- # state that "The return value must be a string object". However, does a
- # unicode object (being a subclass of basestring) count as a "string
- # object"?
- # If so, then return a unicode object:
- return unicode(obj)
- # Else encode it... but how? There are many choices... :)
- # Replace unprintables with escape codes?
- #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
- # Replace unprintables with question marks?
- #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
- # ...
+ # Else encode it
+ ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
+ xmlcharref = Regex('&#\d+;')
+ xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
+ return xmlcharref.transformString(ret)
# build list of single arg builtins, tolerant of Python version, that can be used as parse actions
singleArgBuiltins = []
@@ -2351,7 +2344,7 @@ class ParseExpression(ParserElement): self.mayReturnEmpty |= other.mayReturnEmpty
self.mayIndexError |= other.mayIndexError
- self.errmsg = "Expected " + str(self)
+ self.errmsg = "Expected " + _ustr(self)
return self
|