diff options
author | ptmcg <ptmcg@austin.rr.com> | 2020-06-24 00:29:53 -0500 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2020-06-24 00:29:53 -0500 |
commit | e91acdf0d3e405ce4b02d2c4f27c51e223a01b59 (patch) | |
tree | b4a9ec95c3d42108318979c6f85380a3d7a4efdd | |
parent | 60285bccb6e40a028b6c0a721e9af541b7b4b11c (diff) | |
download | pyparsing-git-e91acdf0d3e405ce4b02d2c4f27c51e223a01b59.tar.gz |
Follow-up to default vs custom name tracking, from Issue #223
-rw-r--r-- | pyparsing/core.py | 221 | ||||
-rw-r--r-- | tests/test_unit.py | 36 |
2 files changed, 132 insertions, 125 deletions
diff --git a/pyparsing/core.py b/pyparsing/core.py index 1f02985..afef674 100644 --- a/pyparsing/core.py +++ b/pyparsing/core.py @@ -1,9 +1,9 @@ # # core.py # +from abc import ABC, abstractmethod import string import copy -import sys import warnings import re import sre_constants @@ -250,7 +250,7 @@ def nullDebugAction(*args): pass -class ParserElement: +class ParserElement(ABC): """Abstract base level parser element class.""" DEFAULT_WHITE_CHARS = " \n\t\r" @@ -308,8 +308,8 @@ class ParserElement: def __init__(self, savelist=False): self.parseAction = list() self.failAction = None - self.name = None - self.strRepr = None + self.customName = None + self._defaultName = None self.resultsName = None self.saveAsList = savelist self.skipWhitespace = True @@ -360,21 +360,6 @@ class ParserElement: cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS return cpy - def setName(self, name): - """ - Define name for this expression, makes debugging and exception messages clearer. - - Example:: - - Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) - Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) - """ - self.name = name - self.errmsg = "Expected " + self.name - if __diag__.enable_debug_on_named_expressions: - self.setDebug() - return self - def setResultsName(self, name, listAllMatches=False): """ Define name for referencing matching tokens as a nested attribute @@ -1476,32 +1461,50 @@ class ParserElement: self.debug = False return self - def _make_str_repr(self): - raise NotImplemented + @property + def defaultName(self): + if self._defaultName is None: + self._defaultName = self._generateDefaultName() + return self._defaultName + + @abstractmethod + def _generateDefaultName(self): + """ + Child classes must define this method, which defines how the `defaultName` is set. + """ + pass + + def setName(self, name): + self.customName = name + self.errmsg = "Expected " + self.name + if __diag__.enable_debug_on_named_expressions: + self.setDebug() + return self + + @property + def name(self): + # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name + return self.customName if self.customName is not None else self.defaultName def __str__(self): - if self.name is not None: - return self.name - if self.strRepr is None: - self.strRepr = self._make_str_repr() - return self.strRepr + return self.name def __repr__(self): return str(self) def streamline(self): self.streamlined = True - self.strRepr = None + self._defaultName = None return self - def checkRecursion(self, parseElementList): + def _checkRecursion(self, parseElementList): pass def validate(self, validateTrace=None): """ Check defined expressions for valid structure, check for infinite recursive definitions. """ - self.checkRecursion([]) + self._checkRecursion([]) def parseFile(self, file_or_filename, parseAll=False): """ @@ -1735,11 +1738,12 @@ class _PendingSkip(ParserElement): # once another ParserElement is added, this placeholder will be replaced with a SkipTo def __init__(self, expr, must_skip=False): super().__init__() - self.strRepr = str(expr + Empty()).replace("Empty", "...") - self.name = self.strRepr self.anchor = expr self.must_skip = must_skip + def _generateDefaultName(self): + return str(self.anchor + Empty()).replace("Empty", "...") + def __add__(self, other): skipper = SkipTo(other).setName("...")("_skipped*") if self.must_skip: @@ -1762,7 +1766,7 @@ class _PendingSkip(ParserElement): return self.anchor + skipper + other def __repr__(self): - return self.strRepr + return self.defaultName def parseImpl(self, *args): raise Exception( @@ -1778,6 +1782,9 @@ class Token(ParserElement): def __init__(self): super().__init__(savelist=False) + def _generateDefaultName(self): + return type(self).__name__ + class Empty(Token): """An empty token, will always match. @@ -1785,7 +1792,6 @@ class Empty(Token): def __init__(self): super().__init__() - self.name = "Empty" self.mayReturnEmpty = True self.mayIndexError = False @@ -1796,7 +1802,6 @@ class NoMatch(Token): def __init__(self): super().__init__() - self.name = "NoMatch" self.mayReturnEmpty = True self.mayIndexError = False self.errmsg = "Unmatchable token" @@ -1833,7 +1838,6 @@ class Literal(Token): stacklevel=2, ) self.__class__ = Empty - self.name = '"%s"' % str(self.match) self.errmsg = "Expected " + self.name self.mayReturnEmpty = False self.mayIndexError = False @@ -1843,6 +1847,9 @@ class Literal(Token): if self.matchLen == 1 and type(self) is Literal: self.__class__ = _SingleCharLiteral + def _generateDefaultName(self): + return repr(self.match) + def parseImpl(self, instring, loc, doActions=True): if instring[loc] == self.firstMatchChar and instring.startswith( self.match, loc @@ -1903,7 +1910,6 @@ class Keyword(Token): SyntaxWarning, stacklevel=2, ) - self.name = '"%s"' % self.match self.errmsg = "Expected {} {}".format(type(self).__name__, self.name) self.mayReturnEmpty = False self.mayIndexError = False @@ -1913,6 +1919,9 @@ class Keyword(Token): identChars = identChars.upper() self.identChars = set(identChars) + def _generateDefaultName(self): + return repr(self.match) + def parseImpl(self, instring, loc, doActions=True): errmsg = self.errmsg errloc = loc @@ -1989,7 +1998,6 @@ class CaselessLiteral(Literal): super().__init__(matchString.upper()) # Preserve the defining literal. self.returnString = matchString - self.name = "'%s'" % self.returnString self.errmsg = "Expected " + self.name def parseImpl(self, instring, loc, doActions=True): @@ -2050,7 +2058,6 @@ class CloseMatch(Token): def __init__(self, match_string, maxMismatches=1): super().__init__() - self.name = match_string self.match_string = match_string self.maxMismatches = maxMismatches self.errmsg = "Expected %r (with up to %d mismatches)" % ( @@ -2060,6 +2067,9 @@ class CloseMatch(Token): self.mayIndexError = False self.mayReturnEmpty = False + def _generateDefaultName(self): + return "{}:{!r}".format(type(self).__name__, self.match_string_) + def parseImpl(self, instring, loc, doActions=True): start = loc instrlen = len(instring) @@ -2193,7 +2203,6 @@ class Word(Token): self.maxLen = exact self.minLen = exact - self.name = str(self) self.errmsg = "Expected " + self.name self.mayIndexError = False self.asKeyword = asKeyword @@ -2224,6 +2233,22 @@ class Word(Token): self.re_match = self.re.match self.__class__ = _WordRegex + def _generateDefaultName(self): + def charsAsStr(s): + max_repr_len = 16 + s = _collapseStringToRanges(s) + if len(s) > max_repr_len: + return s[: max_repr_len - 3] + "..." + else: + return s + + if self.initCharsOrig != self.bodyCharsOrig: + return "W:({}, {})".format( + charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig), + ) + else: + return "W:({})".format(charsAsStr(self.initCharsOrig)) + def parseImpl(self, instring, loc, doActions=True): if instring[loc] not in self.initChars: raise ParseException(instring, loc, self.errmsg, self) @@ -2256,22 +2281,6 @@ class Word(Token): return loc, instring[start:loc] - def _make_str_repr(self): - def charsAsStr(s): - max_repr_len = 16 - s = _collapseStringToRanges(s) - if len(s) > max_repr_len: - return s[: max_repr_len - 3] + "..." - else: - return s - - if self.initCharsOrig != self.bodyCharsOrig: - return "W:({}, {})".format( - charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig), - ) - else: - return "W:({})".format(charsAsStr(self.initCharsOrig)) - class _WordRegex(Word): def parseImpl(self, instring, loc, doActions=True): @@ -2367,7 +2376,6 @@ class Regex(Token): self.re_match = self.re.match - self.name = str(self) self.errmsg = "Expected " + self.name self.mayIndexError = False self.mayReturnEmpty = self.re_match("") is not None @@ -2378,6 +2386,9 @@ class Regex(Token): if self.asMatch: self.parseImpl = self.parseImplAsMatch + def _generateDefaultName(self): + return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) + def parseImpl(self, instring, loc, doActions=True): result = self.re_match(instring, loc) if not result: @@ -2409,9 +2420,6 @@ class Regex(Token): ret = result return loc, ret - def _make_str_repr(self): - return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) - def sub(self, repl): r""" Return :class:`Regex` with an attached parse action to transform the parsed @@ -2581,11 +2589,15 @@ class QuotedString(Token): ) raise - self.name = str(self) self.errmsg = "Expected " + self.name self.mayIndexError = False self.mayReturnEmpty = True + def _generateDefaultName(self): + return "quoted string, starting with %s ending with {}".format( + self.quoteChar, self.endQuoteChar, + ) + def parseImpl(self, instring, loc, doActions=True): result = ( instring[loc] == self.firstQuoteChar @@ -2625,11 +2637,6 @@ class QuotedString(Token): return loc, ret - def _make_str_repr(self): - return "quoted string, starting with %s ending with {}".format( - self.quoteChar, self.endQuoteChar, - ) - class CharsNotIn(Token): """Token for matching words composed of characters *not* in a given @@ -2674,11 +2681,17 @@ class CharsNotIn(Token): self.maxLen = exact self.minLen = exact - self.name = str(self) self.errmsg = "Expected " + self.name self.mayReturnEmpty = self.minLen == 0 self.mayIndexError = False + def _generateDefaultName(self): + not_chars_str = _collapseStringToRanges(self.notChars) + if len(not_chars_str) > 16: + return "!W:({}...)".format(self.notChars[: 16 - 3]) + else: + return "!W:({})".format(self.notChars) + def parseImpl(self, instring, loc, doActions=True): if instring[loc] in self.notChars: raise ParseException(instring, loc, self.errmsg, self) @@ -2695,13 +2708,6 @@ class CharsNotIn(Token): return loc, instring[start:loc] - def _make_str_repr(self): - not_chars_str = _collapseStringToRanges(self.notChars) - if len(not_chars_str) > 16: - return "!W:({}...)".format(self.notChars[: 16 - 3]) - else: - return "!W:({})".format(self.notChars) - class White(Token): """Special matching class for matching whitespace. Normally, @@ -2747,7 +2753,6 @@ class White(Token): copy_defaults=True, ) # self.leaveWhitespace() - self.name = "".join(White.whiteStrs[c] for c in self.matchWhite) self.mayReturnEmpty = True self.errmsg = "Expected " + self.name @@ -2762,6 +2767,9 @@ class White(Token): self.maxLen = exact self.minLen = exact + def _generateDefaultName(self): + return "".join(White.whiteStrs[c] for c in self.matchWhite) + def parseImpl(self, instring, loc, doActions=True): if instring[loc] not in self.matchWhite: raise ParseException(instring, loc, self.errmsg, self) @@ -2781,7 +2789,6 @@ class White(Token): class _PositionToken(Token): def __init__(self): super().__init__() - self.name = self.__class__.__name__ self.mayReturnEmpty = True self.mayIndexError = False @@ -2996,7 +3003,7 @@ class ParseExpression(ParserElement): def append(self, other): self.exprs.append(other) - self.strRepr = None + self._defaultName = None return self def leaveWhitespace(self, recursive=True): @@ -3036,7 +3043,7 @@ class ParseExpression(ParserElement): e.ignore(self.ignoreExprs[-1]) return self - def _make_str_repr(self): + def _generateDefaultName(self): return "{}:({})".format(self.__class__.__name__, str(self.exprs)) def streamline(self): @@ -3057,7 +3064,7 @@ class ParseExpression(ParserElement): and not other.debug ): self.exprs = other.exprs[:] + [self.exprs[1]] - self.strRepr = None + self._defaultName = None self.mayReturnEmpty |= other.mayReturnEmpty self.mayIndexError |= other.mayIndexError @@ -3069,7 +3076,7 @@ class ParseExpression(ParserElement): and not other.debug ): self.exprs = self.exprs[:-1] + other.exprs[:] - self.strRepr = None + self._defaultName = None self.mayReturnEmpty |= other.mayReturnEmpty self.mayIndexError |= other.mayIndexError @@ -3081,7 +3088,7 @@ class ParseExpression(ParserElement): tmp = (validateTrace if validateTrace is not None else [])[:] + [self] for e in self.exprs: e.validate(tmp) - self.checkRecursion([]) + self._checkRecursion([]) def copy(self): ret = super().copy() @@ -3127,9 +3134,11 @@ class And(ParseExpression): class _ErrorStop(Empty): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.name = "-" self.leaveWhitespace() + def _generateDefaultName(self): + return "-" + def __init__(self, exprs, savelist=True): exprs = list(exprs) if exprs and Ellipsis in exprs: @@ -3213,14 +3222,14 @@ class And(ParseExpression): other = self._literalStringClass(other) return self.append(other) # And([self, other]) - def checkRecursion(self, parseElementList): + def _checkRecursion(self, parseElementList): subRecCheckList = parseElementList[:] + [self] for e in self.exprs: - e.checkRecursion(subRecCheckList) + e._checkRecursion(subRecCheckList) if not e.mayReturnEmpty: break - def _make_str_repr(self): + def _generateDefaultName(self): return "{" + " ".join(str(e) for e in self.exprs) + "}" @@ -3339,13 +3348,13 @@ class Or(ParseExpression): other = self._literalStringClass(other) return self.append(other) # Or([self, other]) - def _make_str_repr(self): + def _generateDefaultName(self): return "{" + " ^ ".join(str(e) for e in self.exprs) + "}" - def checkRecursion(self, parseElementList): + def _checkRecursion(self, parseElementList): subRecCheckList = parseElementList[:] + [self] for e in self.exprs: - e.checkRecursion(subRecCheckList) + e._checkRecursion(subRecCheckList) def _setResultsName(self, name, listAllMatches=False): if __diag__.warn_multiple_tokens_in_named_alternation: @@ -3440,13 +3449,13 @@ class MatchFirst(ParseExpression): other = self._literalStringClass(other) return self.append(other) # MatchFirst([self, other]) - def _make_str_repr(self): + def _generateDefaultName(self): return "{" + " | ".join(str(e) for e in self.exprs) + "}" - def checkRecursion(self, parseElementList): + def _checkRecursion(self, parseElementList): subRecCheckList = parseElementList[:] + [self] for e in self.exprs: - e.checkRecursion(subRecCheckList) + e._checkRecursion(subRecCheckList) def _setResultsName(self, name, listAllMatches=False): if __diag__.warn_multiple_tokens_in_named_alternation: @@ -3619,13 +3628,13 @@ class Each(ParseExpression): finalResults = sum(resultlist, ParseResults([])) return loc, finalResults - def _make_str_repr(self): + def _generateDefaultName(self): return "{" + " & ".join(str(e) for e in self.exprs) + "}" - def checkRecursion(self, parseElementList): + def _checkRecursion(self, parseElementList): subRecCheckList = parseElementList[:] + [self] for e in self.exprs: - e.checkRecursion(subRecCheckList) + e._checkRecursion(subRecCheckList) class ParseElementEnhance(ParserElement): @@ -3643,7 +3652,6 @@ class ParseElementEnhance(ParserElement): else: expr = self._literalStringClass(Literal(expr)) self.expr = expr - self.strRepr = None if expr is not None: self.mayIndexError = expr.mayIndexError self.mayReturnEmpty = expr.mayReturnEmpty @@ -3700,12 +3708,12 @@ class ParseElementEnhance(ParserElement): self.expr.streamline() return self - def checkRecursion(self, parseElementList): + def _checkRecursion(self, parseElementList): if self in parseElementList: raise RecursiveGrammarException(parseElementList + [self]) subRecCheckList = parseElementList[:] + [self] if self.expr is not None: - self.expr.checkRecursion(subRecCheckList) + self.expr._checkRecursion(subRecCheckList) def validate(self, validateTrace=None): if validateTrace is None: @@ -3713,9 +3721,9 @@ class ParseElementEnhance(ParserElement): tmp = validateTrace[:] + [self] if self.expr is not None: self.expr.validate(tmp) - self.checkRecursion([]) + self._checkRecursion([]) - def _make_str_repr(self): + def _generateDefaultName(self): return "%s:(%s)" % (self.__class__.__name__, str(self.expr)) @@ -3872,7 +3880,7 @@ class NotAny(ParseElementEnhance): raise ParseException(instring, loc, self.errmsg, self) return loc, [] - def _make_str_repr(self): + def _generateDefaultName(self): return "~{" + str(self.expr) + "}" @@ -3964,7 +3972,7 @@ class OneOrMore(_MultipleMatch): (attr_expr * (1,)).parseString(text).pprint() """ - def _make_str_repr(self): + def _generateDefaultName(self): return "{" + str(self.expr) + "}..." @@ -3990,7 +3998,7 @@ class ZeroOrMore(_MultipleMatch): except (ParseException, IndexError): return loc, ParseResults([], name=self.resultsName) - def _make_str_repr(self): + def _generateDefaultName(self): return "[" + str(self.expr) + "]..." @@ -4062,7 +4070,7 @@ class Optional(ParseElementEnhance): tokens = [] return loc, tokens - def _make_str_repr(self): + def _generateDefaultName(self): return "[" + str(self.expr) + "]" @@ -4229,7 +4237,6 @@ class Forward(ParseElementEnhance): if isinstance(other, str_type): other = self._literalStringClass(other) self.expr = other - self.strRepr = None self.mayIndexError = self.expr.mayIndexError self.mayReturnEmpty = self.expr.mayReturnEmpty self.setWhitespaceChars( @@ -4300,11 +4307,11 @@ class Forward(ParseElementEnhance): tmp = validateTrace[:] + [self] if self.expr is not None: self.expr.validate(tmp) - self.checkRecursion([]) + self._checkRecursion([]) - def _make_str_repr(self): - # Avoid infinite recursion by setting a temporary strRepr - self.strRepr = ": ..." + def _generateDefaultName(self): + # Avoid infinite recursion by setting a temporary _defaultName + self._defaultName = ": ..." # Use the string representation of main expression. retString = "..." diff --git a/tests/test_unit.py b/tests/test_unit.py index 02449f7..39fc658 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -328,16 +328,16 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): len(flatten(iniData.asList())), "file %s not parsed correctly" % fnam, ) - for chk in resCheckList: + for chkkey, chkexpect in resCheckList: var = iniData - for attr in chk[0].split("."): + for attr in chkkey.split("."): var = getattr(var, attr) - print(chk[0], var, chk[1]) + print(chkkey, var, chkexpect) self.assertEqual( - chk[1], + chkexpect, var, - "ParseConfigFileTest: failed to parse ini {!r} as expected {}, found {}".format( - chk[0], chk[1], var + "ParseConfigFileTest: failed to parse ini {!r} as expected {!r}, found {}".format( + chkkey, chkexpect, var ), ) print("OK") @@ -788,7 +788,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): def testParseIDL(self): from examples import idlParse - def test(strng, numToks, errloc=0): + def test(strng, numToks, expectedErrloc=0): print(strng) try: bnf = idlParse.CORBA_IDL_BNF() @@ -814,10 +814,10 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): ), ) self.assertEqual( - errloc, + expectedErrloc, err.loc, "expected ParseException at %d, found exception at %d" - % (errloc, err.loc), + % (expectedErrloc, err.loc), ) test( @@ -2692,7 +2692,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): print(reversed_list) expected = ["5", "4", "3", "2", "1"] self.assertEqual( - reversed_list, expected, msg="issue calling reversed(ParseResults)" + expected, reversed_list, msg="issue calling reversed(ParseResults)" ) def testParseResultsValues(self): @@ -2705,7 +2705,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): print(values_set) expected = {"spam", "eggs"} self.assertEqual( - values_set, expected, msg="issue calling ParseResults.values()" + expected, values_set, msg="issue calling ParseResults.values()" ) def testParseResultsAppend(self): @@ -2923,7 +2923,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): expr = pp.Literal("A")("Achar") * ... res = expr.parseString("A") - self.assertEqual(res.asList(), ["A"], "expected expr * ... to match ZeroOrMore") + self.assertEqual(["A"], res.asList(), "expected expr * ... to match ZeroOrMore") print(res.dump()) def testUpcaseDowncaseUnicode(self): @@ -3767,8 +3767,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): res = gg.parseString(testString) print(list(map(str, res))) self.assertEqual( - list(map(str, res)), list(testString), + list(map(str, res)), "Failed to parse using variable length parse actions " "using class constructors as parse actions", ) @@ -4562,11 +4562,11 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): ), ( "options(100) step(100A)", - """Expected "Z", found 'A' (at char 21), (line:1, col:22)""", + """Expected 'Z', found 'A' (at char 21), (line:1, col:22)""", ), ( "options(100) step(22) step(100ZA)", - """Expected ")", found 'A' (at char 31), (line:1, col:32)""", + """Expected ')', found 'A' (at char 31), (line:1, col:32)""", ), ] test_lookup = dict(tests) @@ -4574,8 +4574,8 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): success, output = parser.runTests((t[0] for t in tests), failureTests=True) for test_str, result in output: self.assertEqual( - str(result), test_lookup[test_str], + str(result), "incorrect exception raised for test string {!r}".format(test_str), ) @@ -4837,7 +4837,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): z.parseString("b") except ParseException as pe: self.assertEqual( - r"""Expected {"a" | "ᄑ"}""", + r"""Expected {'a' | 'ᄑ'}""", pe.msg, "Invalid error message raised, got %r" % pe.msg, ) @@ -7618,7 +7618,7 @@ class Test2_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): print(result) self.assertEqual( - [result.date, result.num], expected, msg="issue with GoToColumn" + expected, [result.date, result.num], msg="issue with GoToColumn" ) # Column number does NOT match |