diff options
author | Paul McGuire <ptmcg@austin.rr.com> | 2019-10-13 01:06:22 -0500 |
---|---|---|
committer | Paul McGuire <ptmcg@austin.rr.com> | 2019-10-13 01:06:22 -0500 |
commit | 719c9a50d9995f067311a3dee5e19c2354df1049 (patch) | |
tree | b617fa24c3df4cb0c23900defe7fc9e3db2d667d | |
parent | 417332636f38ff6afb966fa63c2f8fe341ca6b4d (diff) | |
download | pyparsing-git-719c9a50d9995f067311a3dee5e19c2354df1049.tar.gz |
Add support for dynamic overwrite of pyparsing's use of stdlib re module with regex or other RE-compatible module
-rw-r--r-- | CHANGES | 32 | ||||
-rw-r--r-- | pyparsing.py | 30 |
2 files changed, 54 insertions, 8 deletions
@@ -73,13 +73,41 @@ Version 3.0.0a1 pp.__diag__.enable_all_warnings() -- New namespace, assert methods and classes added to support writing unit tests. +- New namespace, assert methods and classes added to support writing + unit tests. - assertParseResultsEquals - assertParseAndCheckList - assertParseAndCheckDict - assertRunTestResults - assertRaisesParseException - - reset_pyparsing_context context manager, to restore pyparsing config settings + - reset_pyparsing_context context manager, to restore pyparsing + config settings + +- Enhanced the Regex class to be compatible with re's compiled with the + re-equivalent regex module. Individual expressions can be built with + regex compiled expressions using: + + import pyparsing as pp + import regex + + # would use regex for this expression + integer_parser = pp.Regex(regex.compile(r'\d+')) + + You can also replace the use of the re module as it is used internally + by pyparsing in a number of classes by overwriting pyparsing's imported + re symbol: + + import pyparsing as pp + import regex + pp.re = regex # redirects all internal re usage in pyparsing to regex + + # would now use regex instead of re to compile this string + integer_parser = pp.Regex(r'\d+') + + # would also now use regex internally instead of re + integer_parser = pp.Word(pp.nums) + + Inspired by PR submitted by bjrnfrdnnd on GitHub, very nice! - Fixed handling of ParseSyntaxExceptions raised as part of Each expressions, when sub-expressions contain '-' backtrack diff --git a/pyparsing.py b/pyparsing.py index 6c3c7ec..48720f0 100644 --- a/pyparsing.py +++ b/pyparsing.py @@ -96,7 +96,7 @@ classes inherit from. Use the docstrings for examples of how to: """ __version__ = "3.0.0a1" -__versionTime__ = "13 Oct 2019 05:28 UTC" +__versionTime__ = "13 Oct 2019 05:49 UTC" __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" import string @@ -219,7 +219,7 @@ __all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag 'stringStart', 'traceParseAction', 'unicodeString', 'withAttribute', 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 'locatedExpr', 'withClass', 'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set', - 'conditionAsParseAction', 'pyparsing_test', + 'conditionAsParseAction', 'pyparsing_test', 're', ] system_version = tuple(sys.version_info)[:3] @@ -3065,14 +3065,32 @@ class Regex(Token): If the given regex contains named groups (defined using ``(?P<name>...)``), these will be preserved as named parse results. + If instead of the Python stdlib re module you wish to use a different RE module + (such as the `regex` module), you can replace it by either building your + Regex object with a compiled RE that was compiled using regex, or by replacing + the imported `re` module in pyparsing with the `regex` module: + + Example:: realnum = Regex(r"[+-]?\d+\.\d*") date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") + + import regex + parser = pp.Regex(regex.compile(r'[0-9]')) + + # or + + import pyparsing + pyparsing.re = regex + + # both of these will use the regex module to compile their internal re's + parser = pp.Regex(r'[0-9]') + parser = pp.Word(pp.nums) + """ - compiledREtype = type(re.compile("[A-Z]")) def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False): """The parameters ``pattern`` and ``flags`` are passed to the ``re.compile()`` function as-is. See the Python @@ -3097,13 +3115,13 @@ class Regex(Token): SyntaxWarning, stacklevel=2) raise - elif isinstance(pattern, Regex.compiledREtype): + elif hasattr(pattern, 'pattern') and hasattr(pattern, 'match'): self.re = pattern - self.pattern = self.reString = str(pattern) + self.pattern = self.reString = pattern.pattern self.flags = flags else: - raise ValueError("Regex may only be constructed with a string or a compiled RE object") + raise TypeError("Regex may only be constructed with a string or a compiled RE object") self.re_match = self.re.match |