summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2019-10-13 01:06:22 -0500
committerPaul McGuire <ptmcg@austin.rr.com>2019-10-13 01:06:22 -0500
commit719c9a50d9995f067311a3dee5e19c2354df1049 (patch)
treeb617fa24c3df4cb0c23900defe7fc9e3db2d667d
parent417332636f38ff6afb966fa63c2f8fe341ca6b4d (diff)
downloadpyparsing-git-719c9a50d9995f067311a3dee5e19c2354df1049.tar.gz
Add support for dynamic overwrite of pyparsing's use of stdlib re module with regex or other RE-compatible module
-rw-r--r--CHANGES32
-rw-r--r--pyparsing.py30
2 files changed, 54 insertions, 8 deletions
diff --git a/CHANGES b/CHANGES
index e0ae174..162f96e 100644
--- a/CHANGES
+++ b/CHANGES
@@ -73,13 +73,41 @@ Version 3.0.0a1
pp.__diag__.enable_all_warnings()
-- New namespace, assert methods and classes added to support writing unit tests.
+- New namespace, assert methods and classes added to support writing
+ unit tests.
- assertParseResultsEquals
- assertParseAndCheckList
- assertParseAndCheckDict
- assertRunTestResults
- assertRaisesParseException
- - reset_pyparsing_context context manager, to restore pyparsing config settings
+ - reset_pyparsing_context context manager, to restore pyparsing
+ config settings
+
+- Enhanced the Regex class to be compatible with re's compiled with the
+ re-equivalent regex module. Individual expressions can be built with
+ regex compiled expressions using:
+
+ import pyparsing as pp
+ import regex
+
+ # would use regex for this expression
+ integer_parser = pp.Regex(regex.compile(r'\d+'))
+
+ You can also replace the use of the re module as it is used internally
+ by pyparsing in a number of classes by overwriting pyparsing's imported
+ re symbol:
+
+ import pyparsing as pp
+ import regex
+ pp.re = regex # redirects all internal re usage in pyparsing to regex
+
+ # would now use regex instead of re to compile this string
+ integer_parser = pp.Regex(r'\d+')
+
+ # would also now use regex internally instead of re
+ integer_parser = pp.Word(pp.nums)
+
+ Inspired by PR submitted by bjrnfrdnnd on GitHub, very nice!
- Fixed handling of ParseSyntaxExceptions raised as part of Each
expressions, when sub-expressions contain '-' backtrack
diff --git a/pyparsing.py b/pyparsing.py
index 6c3c7ec..48720f0 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -96,7 +96,7 @@ classes inherit from. Use the docstrings for examples of how to:
"""
__version__ = "3.0.0a1"
-__versionTime__ = "13 Oct 2019 05:28 UTC"
+__versionTime__ = "13 Oct 2019 05:49 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -219,7 +219,7 @@ __all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag
'stringStart', 'traceParseAction', 'unicodeString', 'withAttribute',
'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 'locatedExpr', 'withClass',
'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
- 'conditionAsParseAction', 'pyparsing_test',
+ 'conditionAsParseAction', 'pyparsing_test', 're',
]
system_version = tuple(sys.version_info)[:3]
@@ -3065,14 +3065,32 @@ class Regex(Token):
If the given regex contains named groups (defined using ``(?P<name>...)``),
these will be preserved as named parse results.
+ If instead of the Python stdlib re module you wish to use a different RE module
+ (such as the `regex` module), you can replace it by either building your
+ Regex object with a compiled RE that was compiled using regex, or by replacing
+ the imported `re` module in pyparsing with the `regex` module:
+
+
Example::
realnum = Regex(r"[+-]?\d+\.\d*")
date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
# ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
+
+ import regex
+ parser = pp.Regex(regex.compile(r'[0-9]'))
+
+ # or
+
+ import pyparsing
+ pyparsing.re = regex
+
+ # both of these will use the regex module to compile their internal re's
+ parser = pp.Regex(r'[0-9]')
+ parser = pp.Word(pp.nums)
+
"""
- compiledREtype = type(re.compile("[A-Z]"))
def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False):
"""The parameters ``pattern`` and ``flags`` are passed
to the ``re.compile()`` function as-is. See the Python
@@ -3097,13 +3115,13 @@ class Regex(Token):
SyntaxWarning, stacklevel=2)
raise
- elif isinstance(pattern, Regex.compiledREtype):
+ elif hasattr(pattern, 'pattern') and hasattr(pattern, 'match'):
self.re = pattern
- self.pattern = self.reString = str(pattern)
+ self.pattern = self.reString = pattern.pattern
self.flags = flags
else:
- raise ValueError("Regex may only be constructed with a string or a compiled RE object")
+ raise TypeError("Regex may only be constructed with a string or a compiled RE object")
self.re_match = self.re.match