summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Hatch <tim@timhatch.com>2016-06-01 09:54:03 -0700
committerTim Hatch <tim@timhatch.com>2016-06-01 09:54:03 -0700
commitd001c12058bfe796e618bbf31e33ca8880edaab4 (patch)
treefbbd35784183950dd99f6e4bc1821031a8c0ea6f
parentd37944b139baa4151ee3dc0dc89dde0bc6ce83a2 (diff)
downloadpygments-d001c12058bfe796e618bbf31e33ca8880edaab4.tar.gz
Fix a bug in default grouping in regexopt when the words are all the same length.
Fixes #1229
-rw-r--r--pygments/regexopt.py2
-rw-r--r--tests/test_regexopt.py34
2 files changed, 35 insertions, 1 deletions
diff --git a/pygments/regexopt.py b/pygments/regexopt.py
index 79903684..047c703f 100644
--- a/pygments/regexopt.py
+++ b/pygments/regexopt.py
@@ -54,7 +54,7 @@ def regex_opt_inner(strings, open_paren):
return open_paren + regex_opt_inner(rest, '') + '|' \
+ make_charset(oneletter) + close_paren
# print '-> only 1-character'
- return make_charset(oneletter)
+ return open_paren + make_charset(oneletter) + close_paren
prefix = commonprefix(strings)
if prefix:
plen = len(prefix)
diff --git a/tests/test_regexopt.py b/tests/test_regexopt.py
index dd56a446..6322c735 100644
--- a/tests/test_regexopt.py
+++ b/tests/test_regexopt.py
@@ -46,6 +46,7 @@ class RegexOptTestCase(unittest.TestCase):
random.randint(1, len(kwlist) - 1))
no_match = set(kwlist) - set(to_match)
rex = re.compile(regex_opt(to_match))
+ self.assertEqual(rex.groups, 1)
for w in to_match:
self.assertTrue(rex.match(w))
for w in no_match:
@@ -74,3 +75,36 @@ class RegexOptTestCase(unittest.TestCase):
rex = re.compile(opt)
m = rex.match('abfoo')
self.assertEqual(5, m.end())
+
+ def test_different_length_grouping(self):
+ opt = regex_opt(('a', 'xyz'))
+ print(opt)
+ rex = re.compile(opt)
+ self.assertTrue(rex.match('a'))
+ self.assertTrue(rex.match('xyz'))
+ self.assertFalse(rex.match('b'))
+ self.assertEqual(1, rex.groups)
+
+ def test_same_length_grouping(self):
+ opt = regex_opt(('a', 'b'))
+ print(opt)
+ rex = re.compile(opt)
+ self.assertTrue(rex.match('a'))
+ self.assertTrue(rex.match('b'))
+ self.assertFalse(rex.match('x'))
+
+ self.assertEqual(1, rex.groups)
+ groups = rex.match('a').groups()
+ self.assertEqual(('a',), groups)
+
+ def test_same_length_suffix_grouping(self):
+ opt = regex_opt(('a', 'b'), suffix='(m)')
+ print(opt)
+ rex = re.compile(opt)
+ self.assertTrue(rex.match('am'))
+ self.assertTrue(rex.match('bm'))
+ self.assertFalse(rex.match('xm'))
+ self.assertFalse(rex.match('ax'))
+ self.assertEqual(2, rex.groups)
+ groups = rex.match('am').groups()
+ self.assertEqual(('a', 'm'), groups)