summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2018-11-09 23:00:19 -0600
committerptmcg <ptmcg@austin.rr.com>2018-11-09 23:00:19 -0600
commit3189e42fcec49b0cd2d7ba44ff7d4b6e38032e8e (patch)
treeaf83aa213f0ddae38c7cf684d4ee2aa26bbf2b74
parent29c79874e135b93128f821765d11a401bfb4ac1e (diff)
downloadpyparsing-git-3189e42fcec49b0cd2d7ba44ff7d4b6e38032e8e.tar.gz
Added unicode sets for LatinA and LatinB ranges
-rw-r--r--CHANGES5
-rw-r--r--pyparsing.py14
-rw-r--r--unitTests.py19
3 files changed, 33 insertions, 5 deletions
diff --git a/CHANGES b/CHANGES
index e96fee0..b9ae519 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,6 +2,11 @@
Change Log
==========
+Version 2.3.1 -
+---------------
+- Added unicode sets to pyparsing_unicode for Latin-A and Latin-B ranges.
+
+
Version 2.3.0 - October, 2018
-----------------------------
- NEW SUPPORT FOR UNICODE CHARACTER RANGES
diff --git a/pyparsing.py b/pyparsing.py
index 5006e4e..19b0141 100644
--- a/pyparsing.py
+++ b/pyparsing.py
@@ -75,8 +75,8 @@ classes inherit from. Use the docstrings for examples of how to:
- find more useful common expressions in the L{pyparsing_common} namespace class
"""
-__version__ = "2.3.0"
-__versionTime__ = "28 Oct 2018 01:57 UTC"
+__version__ = "2.3.1"
+__versionTime__ = "10 Nov 2018 04:38 UTC"
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
import string
@@ -5865,9 +5865,13 @@ class pyparsing_unicode(unicode_set):
_ranges = [(32, sys.maxunicode)]
class Latin1(unicode_set):
- _ranges = [
- (0x0020, 0x007e), (0x00a0, 0x00ff),
- ]
+ _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
+
+ class LatinA(unicode_set):
+ _ranges = [(0x0100, 0x017f),]
+
+ class LatinB(unicode_set):
+ _ranges = [(0x0180, 0x024f),]
class Greek(unicode_set):
_ranges = [
diff --git a/unitTests.py b/unitTests.py
index e54d68b..e4f2c63 100644
--- a/unitTests.py
+++ b/unitTests.py
@@ -3632,6 +3632,24 @@ class UnicodeTests(ParseTestCase):
self.assertTrue(result.asList() == [u'Καλημέρα', ',', u'κόσμε', '!'],
"Failed to parse Greek 'Hello, World!' using pyparsing_unicode.Greek.alphas")
+ class Turkish_set(pp.unicode_set):
+ _ranges = pp.pyparsing_unicode.Latin1._ranges + pp.pyparsing_unicode.LatinA._ranges
+
+ key = pp.Word(Turkish_set.alphas)
+ value = pp.pyparsing_common.integer | pp.Word(Turkish_set.alphas, Turkish_set.alphanums)
+ EQ = pp.Suppress('=')
+ key_value = key + EQ + value
+
+ sample = """\
+ şehir=İzmir
+ ülke=Türkiye
+ nüfus=4279677"""
+ result = pp.Dict(pp.OneOrMore(pp.Group(key_value))).parseString(sample)
+
+ print(result.asDict())
+ self.assertEqual(result.asDict(), {'şehir': 'İzmir', 'ülke': 'Türkiye', 'nüfus': 4279677},
+ "Failed to parse Turkish key-value pairs")
+
class IndentedBlockTest(ParseTestCase):
# parse pseudo-yaml indented text
def runTest(self):
@@ -3914,6 +3932,7 @@ if __name__ == '__main__':
# run specific tests by including them in this list, otherwise
# all tests will be run
testclasses = [
+ UnicodeTests
]
if not testclasses: