From b62311f1bcffda936eab4173e2d94f4cd6f75a1f Mon Sep 17 00:00:00 2001
From: Matthew Peveler <matt.peveler@gmail.com>
Date: Sun, 22 May 2022 11:46:25 -0400
Subject: Add future-compat attribute parsing (#255)

---
 asciidoc/attrs.py   |  77 +++++++++++++++++++++--
 tests/conftest.py   |   9 +++
 tests/test_attrs.py | 178 ++++++++++++++++++++++++++++++++--------------------
 3 files changed, 193 insertions(+), 71 deletions(-)
 create mode 100644 tests/conftest.py

diff --git a/asciidoc/attrs.py b/asciidoc/attrs.py
index f61b62b..42a6aae 100644
--- a/asciidoc/attrs.py
+++ b/asciidoc/attrs.py
@@ -1,6 +1,7 @@
 import re
 import typing
 
+from . import get_compat_mode
 from .utils import get_args, get_kwargs
 
 
@@ -15,16 +16,85 @@ def parse_attributes(attrs: str, output_dict: typing.Dict) -> None:
     output_dict: {}
 
     attrs: 'hello,world'
-    output_dict: {'2': 'world', '0': 'hello,world', '1': 'hello'}
+    output_dict: {'0': 'hello,world', '1': 'hello', '2': 'world',}
 
     attrs: '"hello", planet="earth"'
-    output_dict: {'planet': 'earth', '0': '"hello", planet="earth"', '1': 'hello'}
+    output_dict: {'0': '"hello", planet="earth"', '1': 'hello' 'planet': 'earth', }
     """
     if not attrs:
         return
     output_dict['0'] = attrs
     # Replace line separators with spaces so line spanning works.
     s = re.sub(r'\s', ' ', attrs)
+    d = legacy_parse(s) if get_compat_mode() == 1 else future_parse(s)
+    output_dict.update(d)
+    assert len(d) > 0
+
+
+def future_parse(s: str) -> dict:
+    d = {}
+    key = ''
+    value = ''
+    count = 1
+    quote = None
+    in_quotes = False
+    had_quotes = False
+
+    def add_value():
+        nonlocal count, d, key, value
+        key = key.strip()
+        value = value.strip()
+        if had_quotes:
+            value = value[1:-1]
+
+        if not value and not had_quotes:
+            value = None
+
+        if key:
+            d[key] = value if value else ''
+            key = ''
+        else:
+            d["{}".format(count)] = value
+        count += 1
+        value = ''
+
+    for i in range(len(s)):
+        char = s[i]
+
+        if char == ',' and not in_quotes:
+            add_value()
+            had_quotes = False
+        elif value and char == '=' and not in_quotes:
+            key = value
+            value = ''
+        elif not in_quotes and (char == '"' or char == "'") \
+                and (i == 0 or s[i - 1] != '\\'):
+            in_quotes = True
+            quote = char
+            value += char
+        elif in_quotes and char == quote and (i == 0 or s[i - 1] != '\\'):
+            in_quotes = False
+            had_quotes = True
+            quote = None
+            value += char
+        elif char == '\\' and i < len(s) - 1 and (s[i + 1] == '"' or s[i + 1] == "'"):
+            pass
+        else:
+            value += char
+
+    if key and key[0] == '=' and not value:
+        value = key + "="
+        key = ""
+
+    if not value and s.rstrip()[-1] == ',':
+        value = ' '
+
+    if had_quotes or value or key:
+        add_value()
+    return d
+
+
+def legacy_parse(s: str) -> dict:
     d = {}
     try:
         d.update(get_args(s))
@@ -47,5 +117,4 @@ def parse_attributes(attrs: str, output_dict: typing.Dict) -> None:
         for k in list(d.keys()):  # Drop any empty positional arguments.
             if d[k] == '':
                 del d[k]
-    output_dict.update(d)
-    assert len(d) > 0
+    return d
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..2091d6f
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,9 @@
+from asciidoc import set_future_compat, set_legacy_compat
+import pytest
+
+
+@pytest.fixture
+def enable_future_compat() -> None:
+    set_future_compat()
+    yield
+    set_legacy_compat()
diff --git a/tests/test_attrs.py b/tests/test_attrs.py
index 9991f7c..c3b90c6 100644
--- a/tests/test_attrs.py
+++ b/tests/test_attrs.py
@@ -2,9 +2,31 @@ from asciidoc import attrs
 import pytest
 
 
-@pytest.mark.parametrize(
-    "input,expected",
-    (
+testcases = {
+    # these test cases fail under future mode
+    "pure_legacy": (
+        # In future mode, all values are always strings
+        (
+            'height=100,caption="",link="images/octocat.png"',
+            {
+                '0': 'height=100,caption="",link="images/octocat.png"',
+                'height': 100,
+                'caption': '',
+                'link': 'images/octocat.png',
+            },
+        ),
+        (
+            "height=100,caption='',link='images/octocat.png'",
+            {
+                '0': "height=100,caption='',link='images/octocat.png'",
+                'height': 100,
+                'caption': '',
+                'link': 'images/octocat.png',
+            },
+        ),
+    ),
+    # these test cases pass under both legacy and future modes
+    "legacy": (
         # docstring tests
         ('', {}),
         ('hello,world', {'0': 'hello,world', '1': 'hello', '2': 'world'}),
@@ -14,19 +36,13 @@ import pytest
         ),
         # tests taken from
         # https://github.com/asciidoctor/asciidoctor/blob/main/test/attribute_list_test.rb
-        # commented out tests are currently supported by asciidoc.py
         ('quote', {'0': 'quote', '1': 'quote'}),
         ('"quote"', {'0': '"quote"', '1': 'quote'}),
         ('""', {'0': '""', '1': ''}),
-        # ('"ba\"zaar"', {'0': '"ba\"zaar"', '1': 'ba"zaar'}),
         ("'quote'", {'0': "'quote'", '1': 'quote'}),
         ("''", {'0': "''", '1': ''}),
         ('\'', {'0': '\'', '1': '\''}),
-        # ('name=\'', {'0': 'name=\'', 'name': '\''}),
-        # ('name=\'{val}', {'0': 'name=\'{val}', 'name': '\'{val}'}),
         ('\'ba\\\'zaar\'', {'0': '\'ba\\\'zaar\'', '1': 'ba\'zaar'}),
-        # ('quote , ', {'0': 'quote , ', '1': 'quote', '2': None}),
-        # (', John Smith', {'0': ', John Smith', '1': None, '2': 'John Smith'}),
         (
             'first, second one, third',
             {
@@ -35,85 +51,113 @@ import pytest
                 '2': 'second one', '3': 'third',
             },
         ),
-        # (
-        #     'first,,third,',
-        #     {'0': 'first,,third,', '1': 'first', '2': None, '3': 'third', '4': None}
-        # ),
         ('=foo=', {'0': '=foo=', '1': '=foo='}),
-        # ('foo=bar', {'0': 'foo=bar', 'foo': 'bar'}),
         ('foo="bar"', {'0': 'foo="bar"', 'foo': 'bar'}),
+
+        ('foo=\'bar\'', {'0': 'foo=\'bar\'', 'foo': 'bar'}),
+
+    ),
+    # these tests only pass under future mode
+    # tests taken from
+    # https://github.com/asciidoctor/asciidoctor/blob/main/test/attribute_list_test.rb
+    "future": (
+        ('"ba\"zaar"', {'0': '"ba\"zaar"', '1': 'ba"zaar'}),
+        ('name=\'', {'0': 'name=\'', 'name': '\''}),
+        ('name=\'{val}', {'0': 'name=\'{val}', 'name': '\'{val}'}),
+        ('quote , ', {'0': 'quote , ', '1': 'quote', '2': None}),
+        (', John Smith', {'0': ', John Smith', '1': None, '2': 'John Smith'}),
+        (
+            'first,,third,',
+            {'0': 'first,,third,', '1': 'first', '2': None, '3': 'third', '4': None}
+        ),
+        ('foo=bar', {'0': 'foo=bar', 'foo': 'bar'}),
+        ('foo=', {'0': 'foo=', 'foo': ''}),
+        ('foo=,bar=baz', {'0': 'foo=,bar=baz', 'foo': '', 'bar': 'baz'}),
         (
             'height=100,caption="",link="images/octocat.png"',
             {
                 '0': 'height=100,caption="",link="images/octocat.png"',
-                'height': 100,
+                'height': '100',
                 'caption': '',
                 'link': 'images/octocat.png',
             },
         ),
-        ('foo=\'bar\'', {'0': 'foo=\'bar\'', 'foo': 'bar'}),
         (
             "height=100,caption='',link='images/octocat.png'",
             {
                 '0': "height=100,caption='',link='images/octocat.png'",
-                'height': 100,
+                'height': '100',
                 'caption': '',
                 'link': 'images/octocat.png',
             },
         ),
-        # ('foo=', {'0': 'foo=', 'foo': ''}),
-        # ('foo=,bar=baz', {'0': 'foo=,bar=baz', 'foo': '', 'bar': 'baz'}),
-        # (
-        #     'first=value, second=two, third=3',
-        #     {
-        #         '0': 'first=value, second=two, third=3',
-        #         'first': 'value',
-        #         'second': 'two',
-        #         'third': '3',
-        #     },
-        # ),
-        # (
-        #     'first=\'value\', second="value two", third=three',
-        #     {
-        #         '0': 'first=\'value\', second="value two", third=three',
-        #         'first': 'value',
-        #         'second': 'value two',
-        #         'third': 'three',
-        #     },
-        # ),
-        # (
-        #     "     first    =     'value', second     =\"value two\"     , third=       three      ", # noqa: E501
-        #     {
-        #         '0': "     first    =     'value', second     =\"value two\"     , third=       three      ", # noqa: E501
-        #         'first': 'value',
-        #         'second': 'value two',
-        #         'third': 'three',
-        #     },
-        # ),
-        # (
-        #     'first, second="value two", third=three, Sherlock Holmes',
-        #     {
-        #         '0': 'first, second="value two", third=three, Sherlock Holmes',
-        #         '1': 'first',
-        #         'second': 'value two',
-        #         'third': 'three',
-        #         '4': 'Sherlock Holmes',
-        #     },
-        # ),
-        # (
-        #     'first,,third=,,fifth=five',
-        #     {
-        #         '0': 'first,,third=,,fifth=five',
-        #         '1': 'first',
-        #         '2': None,
-        #         'third': '',
-        #         '4': None,
-        #         'fifth': 'five',
-        #     },
-        # ),
+        (
+            'first=value, second=two, third=3',
+            {
+                '0': 'first=value, second=two, third=3',
+                'first': 'value',
+                'second': 'two',
+                'third': '3',
+            },
+        ),
+        (
+            'first=\'value\', second="value two", third=three',
+            {
+                '0': 'first=\'value\', second="value two", third=three',
+                'first': 'value',
+                'second': 'value two',
+                'third': 'three',
+            },
+        ),
+        (
+            "     first    =     'value', second     =\"value two\"     , third=       three      ",  # noqa: E501
+            {
+                '0': "     first    =     'value', second     =\"value two\"     , third=       three      ",  # noqa: E501
+                'first': 'value',
+                'second': 'value two',
+                'third': 'three',
+            },
+        ),
+        (
+            'first, second="value two", third=three, Sherlock Holmes',
+            {
+                '0': 'first, second="value two", third=three, Sherlock Holmes',
+                '1': 'first',
+                'second': 'value two',
+                'third': 'three',
+                '4': 'Sherlock Holmes',
+            },
+        ),
+        (
+            'first,,third=,,fifth=five',
+            {
+                '0': 'first,,third=,,fifth=five',
+                '1': 'first',
+                '2': None,
+                'third': '',
+                '4': None,
+                'fifth': 'five',
+            },
+        ),
     )
+}
+
+
+@pytest.mark.parametrize(
+    "input,expected",
+    testcases["legacy"] + testcases["pure_legacy"],
 )
 def test_parse_attributes(input, expected):
     output = dict()
     attrs.parse_attributes(input, output)
     assert output == expected
+
+
+@pytest.mark.parametrize(
+    "input,expected",
+    testcases['legacy'] + testcases["future"],
+)
+def test_parse_future_attributes(enable_future_compat, input, expected):
+    output = dict()
+    attrs.parse_attributes(input, output)
+    assert output == expected
-- 
cgit v1.2.1