diff options
author | ptmcg <ptmcg@austin.rr.com> | 2021-10-26 15:53:53 -0500 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2021-10-26 15:53:53 -0500 |
commit | ddfcd6b656b8ee11517126348fa17d26332cc6cc (patch) | |
tree | 5642f49e0e2098eaaf30c39297444172cf6c1b11 | |
parent | ac7d5a9f3c7a4ff5f617f7488944aaad63c4e47f (diff) | |
download | pyparsing-git-ddfcd6b656b8ee11517126348fa17d26332cc6cc.tar.gz |
Fix one_of to generate regex internally, even if caseless or as_keyword given as True
-rw-r--r-- | CHANGES | 25 | ||||
-rw-r--r-- | pyparsing/__init__.py | 2 | ||||
-rw-r--r-- | pyparsing/helpers.py | 38 |
3 files changed, 43 insertions, 22 deletions
@@ -2,33 +2,40 @@ Change Log ========== +Version 3.0.2 - +--------------- +- Performance enhancement to `one_of` to always generate `regex`, even + if `caseless` or `as_keyword` args are given as `True` (unless explicitly + disabled by passing `use_regex=True`. + + Version 3.0.1 - --------------- -- Fixed bug where Word(max=n) did not match word groups less than length 'n'. +- Fixed bug where `Word(max=n)` did not match word groups less than length 'n'. Thanks to Joachim Metz for catching this! -- Fixed bug where ParseResults accidentally created recursive contents. +- Fixed bug where `ParseResults` accidentally created recursive contents. Joachim Metz on this one also! -- Fixed bug where warn_on_multiple_string_args_to_oneof warning is raised +- Fixed bug where `warn_on_multiple_string_args_to_oneof` warning is raised even when not enabled. Version 3.0.0 - --------------- - A consolidated list of all the changes in the 3.0.0 release can be found in - docs/whats_new_in_3_0_0.rst. + `docs/whats_new_in_3_0_0.rst`. (https://github.com/pyparsing/pyparsing/blob/master/docs/whats_new_in_3_0_0.rst) Version 3.0.0.final - --------------------- -- Added support for python -W warning option to call enable_all_warnings() at startup. - Also detects setting of PYPARSINGENABLEALLWARNINGS environment variable to any non-blank +- Added support for python `-W` warning option to call `enable_all_warnings`() at startup. + Also detects setting of `PYPARSINGENABLEALLWARNINGS` environment variable to any non-blank value. - Fixed named results returned by `url` to match fields as they would be parsed - using urllib.parse.urlparse. + using `urllib.parse.urlparse`. - Early response to `with_line_numbers` was positive, with some requested enhancements: . added a trailing "|" at the end of each line (to show presence of trailing spaces); @@ -40,8 +47,8 @@ Version 3.0.0.final - . added mark_control argument to support highlighting of control characters using '.' or Unicode symbols, such as "␍" and "␊". -- Modified helpers common_html_entity and replace_html_entity() to use the HTML - entity definitions from html.entities.html5. +- Modified helpers `common_html_entity` and `replace_html_entity()` to use the HTML + entity definitions from `html.entities.html5`. - Updated the class diagram in the pyparsing docs directory, along with the supporting .puml file (PlantUML markup) used to create the diagram. diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index b1486ab..bf0fe81 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -105,7 +105,7 @@ __version__ = "{}.{}.{}".format(*__version_info__[:3]) + ( ), "", )[__version_info__.release_level == "final"] -__version_time__ = "25 October 2021 15:10 UTC" +__version_time__ = "26 October 2021 20:39 UTC" __versionTime__ = __version_time__ __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>" diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index 2d112de..0dde451 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -1,5 +1,6 @@ # helpers.py import html.entities +import re from . import __diag__ from .core import * @@ -253,9 +254,9 @@ def one_of( if not symbols: return NoMatch() - if not asKeyword: - # if not producing keywords, need to reorder to take care to avoid masking - # longer choices with shorter ones + # reorder given symbols to take care to avoid masking longer choices with shorter ones + # (but only if the given symbols are not just single characters) + if any(len(sym) > 1 for sym in symbols): i = 0 while i < len(symbols) - 1: cur = symbols[i] @@ -270,17 +271,30 @@ def one_of( else: i += 1 - if not (caseless or asKeyword) and useRegex: - # ~ print(strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols])) + if useRegex: + re_flags: int = re.IGNORECASE if caseless else 0 + try: - if len(symbols) == len("".join(symbols)): - return Regex( - "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) - ).set_name(" | ".join(symbols)) + if all(len(sym) == 1 for sym in symbols): + # symbols are just single characters, create range regex pattern + patt = "[{}]".format("".join(_escapeRegexRangeChars(sym) for sym in symbols)) else: - return Regex("|".join(re.escape(sym) for sym in symbols)).set_name( - " | ".join(symbols) - ) + patt = "|".join(re.escape(sym) for sym in symbols) + + # wrap with \b word break markers if defining as keywords + if asKeyword: + patt = r"\b(:?{})\b".format(patt) + + ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) + + if caseless: + # add parse action to return symbols as specified, not in random + # casing as found in input string + symbol_map = {sym.lower(): sym for sym in symbols} + ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) + + return ret + except sre_constants.error: warnings.warn( "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 |