summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2021-10-26 15:53:53 -0500
committerptmcg <ptmcg@austin.rr.com>2021-10-26 15:53:53 -0500
commitddfcd6b656b8ee11517126348fa17d26332cc6cc (patch)
tree5642f49e0e2098eaaf30c39297444172cf6c1b11
parentac7d5a9f3c7a4ff5f617f7488944aaad63c4e47f (diff)
downloadpyparsing-git-ddfcd6b656b8ee11517126348fa17d26332cc6cc.tar.gz
Fix one_of to generate regex internally, even if caseless or as_keyword given as True
-rw-r--r--CHANGES25
-rw-r--r--pyparsing/__init__.py2
-rw-r--r--pyparsing/helpers.py38
3 files changed, 43 insertions, 22 deletions
diff --git a/CHANGES b/CHANGES
index a1f1d37..e80bead 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,33 +2,40 @@
Change Log
==========
+Version 3.0.2 -
+---------------
+- Performance enhancement to `one_of` to always generate `regex`, even
+ if `caseless` or `as_keyword` args are given as `True` (unless explicitly
+ disabled by passing `use_regex=True`.
+
+
Version 3.0.1 -
---------------
-- Fixed bug where Word(max=n) did not match word groups less than length 'n'.
+- Fixed bug where `Word(max=n)` did not match word groups less than length 'n'.
Thanks to Joachim Metz for catching this!
-- Fixed bug where ParseResults accidentally created recursive contents.
+- Fixed bug where `ParseResults` accidentally created recursive contents.
Joachim Metz on this one also!
-- Fixed bug where warn_on_multiple_string_args_to_oneof warning is raised
+- Fixed bug where `warn_on_multiple_string_args_to_oneof` warning is raised
even when not enabled.
Version 3.0.0 -
---------------
- A consolidated list of all the changes in the 3.0.0 release can be found in
- docs/whats_new_in_3_0_0.rst.
+ `docs/whats_new_in_3_0_0.rst`.
(https://github.com/pyparsing/pyparsing/blob/master/docs/whats_new_in_3_0_0.rst)
Version 3.0.0.final -
---------------------
-- Added support for python -W warning option to call enable_all_warnings() at startup.
- Also detects setting of PYPARSINGENABLEALLWARNINGS environment variable to any non-blank
+- Added support for python `-W` warning option to call `enable_all_warnings`() at startup.
+ Also detects setting of `PYPARSINGENABLEALLWARNINGS` environment variable to any non-blank
value.
- Fixed named results returned by `url` to match fields as they would be parsed
- using urllib.parse.urlparse.
+ using `urllib.parse.urlparse`.
- Early response to `with_line_numbers` was positive, with some requested enhancements:
. added a trailing "|" at the end of each line (to show presence of trailing spaces);
@@ -40,8 +47,8 @@ Version 3.0.0.final -
. added mark_control argument to support highlighting of control characters using
'.' or Unicode symbols, such as "␍" and "␊".
-- Modified helpers common_html_entity and replace_html_entity() to use the HTML
- entity definitions from html.entities.html5.
+- Modified helpers `common_html_entity` and `replace_html_entity()` to use the HTML
+ entity definitions from `html.entities.html5`.
- Updated the class diagram in the pyparsing docs directory, along with the supporting
.puml file (PlantUML markup) used to create the diagram.
diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py
index b1486ab..bf0fe81 100644
--- a/pyparsing/__init__.py
+++ b/pyparsing/__init__.py
@@ -105,7 +105,7 @@ __version__ = "{}.{}.{}".format(*__version_info__[:3]) + (
),
"",
)[__version_info__.release_level == "final"]
-__version_time__ = "25 October 2021 15:10 UTC"
+__version_time__ = "26 October 2021 20:39 UTC"
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py
index 2d112de..0dde451 100644
--- a/pyparsing/helpers.py
+++ b/pyparsing/helpers.py
@@ -1,5 +1,6 @@
# helpers.py
import html.entities
+import re
from . import __diag__
from .core import *
@@ -253,9 +254,9 @@ def one_of(
if not symbols:
return NoMatch()
- if not asKeyword:
- # if not producing keywords, need to reorder to take care to avoid masking
- # longer choices with shorter ones
+ # reorder given symbols to take care to avoid masking longer choices with shorter ones
+ # (but only if the given symbols are not just single characters)
+ if any(len(sym) > 1 for sym in symbols):
i = 0
while i < len(symbols) - 1:
cur = symbols[i]
@@ -270,17 +271,30 @@ def one_of(
else:
i += 1
- if not (caseless or asKeyword) and useRegex:
- # ~ print(strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols]))
+ if useRegex:
+ re_flags: int = re.IGNORECASE if caseless else 0
+
try:
- if len(symbols) == len("".join(symbols)):
- return Regex(
- "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)
- ).set_name(" | ".join(symbols))
+ if all(len(sym) == 1 for sym in symbols):
+ # symbols are just single characters, create range regex pattern
+ patt = "[{}]".format("".join(_escapeRegexRangeChars(sym) for sym in symbols))
else:
- return Regex("|".join(re.escape(sym) for sym in symbols)).set_name(
- " | ".join(symbols)
- )
+ patt = "|".join(re.escape(sym) for sym in symbols)
+
+ # wrap with \b word break markers if defining as keywords
+ if asKeyword:
+ patt = r"\b(:?{})\b".format(patt)
+
+ ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))
+
+ if caseless:
+ # add parse action to return symbols as specified, not in random
+ # casing as found in input string
+ symbol_map = {sym.lower(): sym for sym in symbols}
+ ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
+
+ return ret
+
except sre_constants.error:
warnings.warn(
"Exception creating Regex for one_of, building MatchFirst", stacklevel=2