Add support for combining unicode_sets using multiple inheritance

author: Paul McGuire <ptmcg@austin.rr.com> 2018-11-17 19:09:54 -0600
committer: Paul McGuire <ptmcg@austin.rr.com> 2018-11-17 19:09:54 -0600
commit: 830e5cfcea1dc4bc592f1e61d788b6eccc6052c6 (patch)
tree: e3d0bc378de8a1f34f711446c426c71f91c9e563
parent: 905b01bece3a17e952cb11eaa8c9d2d272b27569 (diff)
download: pyparsing-git-830e5cfcea1dc4bc592f1e61d788b6eccc6052c6.tar.gz
3 files changed, 24 insertions, 10 deletions
diff --git a/CHANGES b/CHANGES
index b9ae519..585166b 100644
--- a/CHANGES
+++ b/CHANGES
@@ -6,6 +6,14 @@ Version 2.3.1 -
 ---------------
 - Added unicode sets to pyparsing_unicode for Latin-A and Latin-B ranges.
 
+- Added ability to define custom unicode sets as combinations of other sets
+  using multiple inheritance.
+
+    class Turkish_set(pp.pyparsing_unicode.Latin1, pp.pyparsing_unicode.LatinA):
+        pass
+
+    turkish_word = pp.Word(Turkish_set.alphas)
+
 
 Version 2.3.0 - October, 2018
 -----------------------------
diff --git a/pyparsing.py b/pyparsing.py
index 19b0141..d7be7b4 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -91,6 +91,7 @@ import pprint
 import traceback
 import types
 from datetime import datetime
+from itertools import takewhile
 try:
     # Python 3
     from itertools import filterfalse
@@ -5841,20 +5842,23 @@ class _lazyclassproperty(object):
         return ret
 
 
-class unicode_set:
+class unicode_set(object):
     _ranges = []
 
     @_lazyclassproperty
     def printables(cls):
-        return ''.join(filterfalse(unicode.isspace, (unichr(c) for r in cls._ranges for c in range(r[0], r[-1] + 1))))
+        ranges = set(sum((cc._ranges for cc in takewhile(lambda x: x is not unicode_set, cls.__mro__)), []))
+        return ''.join(filterfalse(unicode.isspace, (unichr(c) for r in ranges for c in range(r[0], r[-1] + 1))))
 
     @_lazyclassproperty
     def alphas(cls):
-        return ''.join(filter(unicode.isalpha, (unichr(c) for r in cls._ranges for c in range(r[0], r[-1] + 1))))
+        ranges = set(sum((cc._ranges for cc in takewhile(lambda x: x is not unicode_set, cls.__mro__)), []))
+        return ''.join(filter(unicode.isalpha, (unichr(c) for r in ranges for c in range(r[0], r[-1] + 1))))
 
     @_lazyclassproperty
     def nums(cls):
-        return ''.join(filter(unicode.isdigit, (unichr(c) for r in cls._ranges for c in range(r[0], r[-1] + 1))))
+        ranges = set(sum((cc._ranges for cc in takewhile(lambda x: x is not unicode_set, cls.__mro__)), []))
+        return ''.join(filter(unicode.isdigit, (unichr(c) for r in ranges for c in range(r[0], r[-1] + 1))))
 
     @_lazyclassproperty
     def alphanums(cls):
@@ -5901,9 +5905,8 @@ class pyparsing_unicode(unicode_set):
     class Korean(unicode_set):
         _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), ]
 
-    class CJK(unicode_set):
-        _ranges = [  # sum of Chinese, Japanese, and Korean ranges
-        ]
+    class CJK(Chinese, Japanese, Korean):
+        pass
 
     class Thai(unicode_set):
         _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b), ]
@@ -5918,7 +5921,6 @@ class pyparsing_unicode(unicode_set):
         _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)]
 
 pyparsing_unicode.Japanese._ranges = pyparsing_unicode.Japanese.Kanji._ranges + pyparsing_unicode.Japanese.Hiragana._ranges + pyparsing_unicode.Japanese.Katakana._ranges
-pyparsing_unicode.CJK._ranges = pyparsing_unicode.Chinese._ranges + pyparsing_unicode.Japanese._ranges + pyparsing_unicode.Korean._ranges
 
 # define ranges in language character sets
 if PY_3:
diff --git a/unitTests.py b/unitTests.py
index 5cb293d..2aff419 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -3650,8 +3650,9 @@ class UnicodeTests(ParseTestCase):
         self.assertTrue(result.asList() == [u'Καλημέρα', ',', u'κόσμε', '!'],
                         "Failed to parse Greek 'Hello, World!' using pyparsing_unicode.Greek.alphas")
 
-        class Turkish_set(pp.unicode_set):
-            _ranges = pp.pyparsing_unicode.Latin1._ranges + pp.pyparsing_unicode.LatinA._ranges
+        # define a custom unicode range using multiple inheritance
+        class Turkish_set(pp.pyparsing_unicode.Latin1, pp.pyparsing_unicode.LatinA):
+            pass
 
         key = pp.Word(Turkish_set.alphas)
         value = pp.pyparsing_common.integer | pp.Word(Turkish_set.alphas, Turkish_set.alphanums)
@@ -3667,6 +3668,9 @@ class UnicodeTests(ParseTestCase):
         print(result.asDict())
         self.assertEqual(result.asDict(), {'şehir': 'İzmir', 'ülke': 'Türkiye', 'nüfus': 4279677},
                          "Failed to parse Turkish key-value pairs")
+        self.assertEqual(len(pp.pyparsing_unicode.CJK.printables), 53760,
+                         "failed to construct ranges by merging Chinese, Japanese and Korean")
+        self.assertEqual(len(Turkish_set.printables), 317, "failed to construct ranges by merging Latin1 and LatinA")
 
 class IndentedBlockTest(ParseTestCase):
     # parse pseudo-yaml indented text
author	Paul McGuire <ptmcg@austin.rr.com>	2018-11-17 19:09:54 -0600
committer	Paul McGuire <ptmcg@austin.rr.com>	2018-11-17 19:09:54 -0600
commit	830e5cfcea1dc4bc592f1e61d788b6eccc6052c6 (patch)
tree	e3d0bc378de8a1f34f711446c426c71f91c9e563
parent	905b01bece3a17e952cb11eaa8c9d2d272b27569 (diff)
download	pyparsing-git-830e5cfcea1dc4bc592f1e61d788b6eccc6052c6.tar.gz