summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2022-11-06 16:47:20 -0600
committerptmcg <ptmcg@austin.rr.com>2022-11-06 16:47:20 -0600
commit6aef782b6d347db134bafa66004c60b42e42e427 (patch)
tree8fb8b4e455a3671f4320e8cf3b85a46d83ed18e4
parent2c648a1acdd0fc6691b682dff1a23ce9b4005090 (diff)
downloadpyparsing-git-6aef782b6d347db134bafa66004c60b42e42e427.tar.gz
Add Unicode MIDDLE DOT to Latin1.identbodychars
-rw-r--r--CHANGES2
-rw-r--r--pyparsing/unicode.py4
2 files changed, 4 insertions, 2 deletions
diff --git a/CHANGES b/CHANGES
index 413f9e1..471fac1 100644
--- a/CHANGES
+++ b/CHANGES
@@ -60,6 +60,8 @@ help from Devin J. Pohly in structuring the code to enable this peaceful transit
debug flag on an expression and all of its sub-expressions. Requested
by multimeric in Issue #399.
+- Added '·' (Unicode MIDDLE DOT) to the set of Latin1.identbodychars.
+
- Fixed bug in `Word` when `max=2`. Also added performance enhancement
when specifying `exact` argument. Reported in issue #409 by
panda-34, nice catch!
diff --git a/pyparsing/unicode.py b/pyparsing/unicode.py
index c1d469b..9bc5e1d 100644
--- a/pyparsing/unicode.py
+++ b/pyparsing/unicode.py
@@ -100,13 +100,13 @@ class unicode_set:
def identbodychars(cls):
"""
all characters in this range that are valid identifier body characters,
- plus the digits 0-9
+ plus the digits 0-9, and · (Unicode MIDDLE DOT)
"""
return "".join(
sorted(
set(
cls.identchars
- + "0123456789"
+ + "0123456789·"
+ "".join(
[c for c in cls._chars_for_ranges if ("_" + c).isidentifier()]
)