From 45fb266f0f673724292759fbd7697d452a32f3b8 Mon Sep 17 00:00:00 2001
From: Matthew Peveler <matt.peveler@gmail.com>
Date: Tue, 1 Mar 2022 10:35:33 -0500
Subject: Split attribute parsing into own module with tests (#246)

---
 asciidoc/asciidoc.py |  61 +-------------------------
 asciidoc/attrs.py    |  51 ++++++++++++++++++++++
 tests/test_attrs.py  | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 172 insertions(+), 59 deletions(-)
 create mode 100644 asciidoc/attrs.py
 create mode 100644 tests/test_attrs.py

diff --git a/asciidoc/asciidoc.py b/asciidoc/asciidoc.py
index b158e44..f9607a6 100644
--- a/asciidoc/asciidoc.py
+++ b/asciidoc/asciidoc.py
@@ -32,6 +32,7 @@ import unicodedata
 
 from collections import OrderedDict
 
+from .attrs import parse_attributes
 from .blocks.table import parse_table_span_spec, Cell, Column
 from .collections import AttrDict, InsensitiveDict
 from .exceptions import EAsciiDoc
@@ -146,59 +147,6 @@ def safe_filename(fname, parentdir):
     return fname
 
 
-def parse_attributes(attrs, dict):
-    """Update a dictionary with name/value attributes from the attrs string.
-    The attrs string is a comma separated list of values and keyword name=value
-    pairs. Values must precede keywords and are named '1','2'... The entire
-    attributes list is named '0'. If keywords are specified string values must
-    be quoted. Examples:
-
-    attrs: ''
-    dict: {}
-
-    attrs: 'hello,world'
-    dict: {'2': 'world', '0': 'hello,world', '1': 'hello'}
-
-    attrs: '"hello", planet="earth"'
-    dict: {'planet': 'earth', '0': '"hello",planet="earth"', '1': 'hello'}
-    """
-    def f(*args, **keywords):
-        # Name and add arguments '1','2'... to keywords.
-        for i in range(len(args)):
-            if not str(i + 1) in keywords:
-                keywords[str(i + 1)] = args[i]
-        return keywords
-
-    if not attrs:
-        return
-    dict['0'] = attrs
-    # Replace line separators with spaces so line spanning works.
-    s = re.sub(r'\s', ' ', attrs)
-    d = {}
-    try:
-        d.update(utils.get_args(s))
-        d.update(utils.get_kwargs(s))
-        for v in list(d.values()):
-            if not (isinstance(v, str) or isinstance(v, int) or isinstance(v, float) or v is None):
-                raise Exception
-    except Exception:
-        s = s.replace('"', '\\"')
-        s = s.split(',')
-        s = ['"' + x.strip() + '"' for x in s]
-        s = ','.join(s)
-        try:
-            d = {}
-            d.update(utils.get_args(s))
-            d.update(utils.get_kwargs(s))
-        except Exception:
-            return  # If there's a syntax error leave with {0}=attrs.
-        for k in list(d.keys()):  # Drop any empty positional arguments.
-            if d[k] == '':
-                del d[k]
-    dict.update(d)
-    assert len(d) > 0
-
-
 def parse_named_attributes(s, attrs):
     """Update a attrs dictionary with name="value" attributes from the s string.
     Returns False if invalid syntax.
@@ -218,7 +166,7 @@ def parse_named_attributes(s, attrs):
         return False
 
 
-def parse_list(s):
+def parse_list(s) -> typing.Tuple:
     """Parse comma separated string of Python literals. Return a tuple of of
     parsed values."""
     try:
@@ -242,11 +190,6 @@ def parse_options(options, allowed, errmsg):
     return tuple(result)
 
 
-def symbolize(s):
-    """Drop non-symbol characters and convert to lowercase."""
-    return re.sub(r'[^\w\-_]', '', s).lower()
-
-
 def is_name(s):
     """Return True if s is valid attribute, macro or tag name
     (starts with alpha containing alphanumeric and dashes only)."""
diff --git a/asciidoc/attrs.py b/asciidoc/attrs.py
new file mode 100644
index 0000000..f61b62b
--- /dev/null
+++ b/asciidoc/attrs.py
@@ -0,0 +1,51 @@
+import re
+import typing
+
+from .utils import get_args, get_kwargs
+
+
+def parse_attributes(attrs: str, output_dict: typing.Dict) -> None:
+    """Update a dictionary with name/value attributes from the attrs string.
+    The attrs string is a comma separated list of values and keyword name=value
+    pairs. Values must precede keywords and are named '1','2'... The entire
+    attributes list is named '0'. If keywords are specified string values must
+    be quoted. Examples:
+
+    attrs: ''
+    output_dict: {}
+
+    attrs: 'hello,world'
+    output_dict: {'2': 'world', '0': 'hello,world', '1': 'hello'}
+
+    attrs: '"hello", planet="earth"'
+    output_dict: {'planet': 'earth', '0': '"hello", planet="earth"', '1': 'hello'}
+    """
+    if not attrs:
+        return
+    output_dict['0'] = attrs
+    # Replace line separators with spaces so line spanning works.
+    s = re.sub(r'\s', ' ', attrs)
+    d = {}
+    try:
+        d.update(get_args(s))
+        d.update(get_kwargs(s))
+        for v in list(d.values()):
+            if not (isinstance(v, str)
+                    or isinstance(v, int) or isinstance(v, float) or v is None):
+                raise Exception
+    except Exception:
+        s = s.replace('"', '\\"')
+        s = s.split(',')
+        s = ['"' + x.strip() + '"' for x in s]
+        s = ','.join(s)
+        try:
+            d = {}
+            d.update(get_args(s))
+            d.update(get_kwargs(s))
+        except Exception:
+            return  # If there's a syntax error leave with {0}=attrs.
+        for k in list(d.keys()):  # Drop any empty positional arguments.
+            if d[k] == '':
+                del d[k]
+    output_dict.update(d)
+    assert len(d) > 0
diff --git a/tests/test_attrs.py b/tests/test_attrs.py
new file mode 100644
index 0000000..9991f7c
--- /dev/null
+++ b/tests/test_attrs.py
@@ -0,0 +1,119 @@
+from asciidoc import attrs
+import pytest
+
+
+@pytest.mark.parametrize(
+    "input,expected",
+    (
+        # docstring tests
+        ('', {}),
+        ('hello,world', {'0': 'hello,world', '1': 'hello', '2': 'world'}),
+        (
+            '"hello", planet="earth"',
+            {'0': '"hello", planet="earth"', '1': 'hello', 'planet': 'earth'}
+        ),
+        # tests taken from
+        # https://github.com/asciidoctor/asciidoctor/blob/main/test/attribute_list_test.rb
+        # commented out tests are currently supported by asciidoc.py
+        ('quote', {'0': 'quote', '1': 'quote'}),
+        ('"quote"', {'0': '"quote"', '1': 'quote'}),
+        ('""', {'0': '""', '1': ''}),
+        # ('"ba\"zaar"', {'0': '"ba\"zaar"', '1': 'ba"zaar'}),
+        ("'quote'", {'0': "'quote'", '1': 'quote'}),
+        ("''", {'0': "''", '1': ''}),
+        ('\'', {'0': '\'', '1': '\''}),
+        # ('name=\'', {'0': 'name=\'', 'name': '\''}),
+        # ('name=\'{val}', {'0': 'name=\'{val}', 'name': '\'{val}'}),
+        ('\'ba\\\'zaar\'', {'0': '\'ba\\\'zaar\'', '1': 'ba\'zaar'}),
+        # ('quote , ', {'0': 'quote , ', '1': 'quote', '2': None}),
+        # (', John Smith', {'0': ', John Smith', '1': None, '2': 'John Smith'}),
+        (
+            'first, second one, third',
+            {
+                '0': 'first, second one, third',
+                '1': 'first',
+                '2': 'second one', '3': 'third',
+            },
+        ),
+        # (
+        #     'first,,third,',
+        #     {'0': 'first,,third,', '1': 'first', '2': None, '3': 'third', '4': None}
+        # ),
+        ('=foo=', {'0': '=foo=', '1': '=foo='}),
+        # ('foo=bar', {'0': 'foo=bar', 'foo': 'bar'}),
+        ('foo="bar"', {'0': 'foo="bar"', 'foo': 'bar'}),
+        (
+            'height=100,caption="",link="images/octocat.png"',
+            {
+                '0': 'height=100,caption="",link="images/octocat.png"',
+                'height': 100,
+                'caption': '',
+                'link': 'images/octocat.png',
+            },
+        ),
+        ('foo=\'bar\'', {'0': 'foo=\'bar\'', 'foo': 'bar'}),
+        (
+            "height=100,caption='',link='images/octocat.png'",
+            {
+                '0': "height=100,caption='',link='images/octocat.png'",
+                'height': 100,
+                'caption': '',
+                'link': 'images/octocat.png',
+            },
+        ),
+        # ('foo=', {'0': 'foo=', 'foo': ''}),
+        # ('foo=,bar=baz', {'0': 'foo=,bar=baz', 'foo': '', 'bar': 'baz'}),
+        # (
+        #     'first=value, second=two, third=3',
+        #     {
+        #         '0': 'first=value, second=two, third=3',
+        #         'first': 'value',
+        #         'second': 'two',
+        #         'third': '3',
+        #     },
+        # ),
+        # (
+        #     'first=\'value\', second="value two", third=three',
+        #     {
+        #         '0': 'first=\'value\', second="value two", third=three',
+        #         'first': 'value',
+        #         'second': 'value two',
+        #         'third': 'three',
+        #     },
+        # ),
+        # (
+        #     "     first    =     'value', second     =\"value two\"     , third=       three      ", # noqa: E501
+        #     {
+        #         '0': "     first    =     'value', second     =\"value two\"     , third=       three      ", # noqa: E501
+        #         'first': 'value',
+        #         'second': 'value two',
+        #         'third': 'three',
+        #     },
+        # ),
+        # (
+        #     'first, second="value two", third=three, Sherlock Holmes',
+        #     {
+        #         '0': 'first, second="value two", third=three, Sherlock Holmes',
+        #         '1': 'first',
+        #         'second': 'value two',
+        #         'third': 'three',
+        #         '4': 'Sherlock Holmes',
+        #     },
+        # ),
+        # (
+        #     'first,,third=,,fifth=five',
+        #     {
+        #         '0': 'first,,third=,,fifth=five',
+        #         '1': 'first',
+        #         '2': None,
+        #         'third': '',
+        #         '4': None,
+        #         'fifth': 'five',
+        #     },
+        # ),
+    )
+)
+def test_parse_attributes(input, expected):
+    output = dict()
+    attrs.parse_attributes(input, output)
+    assert output == expected
-- 
cgit v1.2.1