summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Peveler <matt.peveler@gmail.com>2022-05-22 11:46:25 -0400
committerGitHub <noreply@github.com>2022-05-22 11:46:25 -0400
commitb62311f1bcffda936eab4173e2d94f4cd6f75a1f (patch)
treec6b928026597bfc27422db470bdeeb37ff600f9e
parent789e362b22261b231eb016e87b06c74d9ec324b5 (diff)
downloadasciidoc-py3-b62311f1bcffda936eab4173e2d94f4cd6f75a1f.tar.gz
Add future-compat attribute parsing (#255)
-rw-r--r--asciidoc/attrs.py77
-rw-r--r--tests/conftest.py9
-rw-r--r--tests/test_attrs.py178
3 files changed, 193 insertions, 71 deletions
diff --git a/asciidoc/attrs.py b/asciidoc/attrs.py
index f61b62b..42a6aae 100644
--- a/asciidoc/attrs.py
+++ b/asciidoc/attrs.py
@@ -1,6 +1,7 @@
import re
import typing
+from . import get_compat_mode
from .utils import get_args, get_kwargs
@@ -15,16 +16,85 @@ def parse_attributes(attrs: str, output_dict: typing.Dict) -> None:
output_dict: {}
attrs: 'hello,world'
- output_dict: {'2': 'world', '0': 'hello,world', '1': 'hello'}
+ output_dict: {'0': 'hello,world', '1': 'hello', '2': 'world',}
attrs: '"hello", planet="earth"'
- output_dict: {'planet': 'earth', '0': '"hello", planet="earth"', '1': 'hello'}
+ output_dict: {'0': '"hello", planet="earth"', '1': 'hello' 'planet': 'earth', }
"""
if not attrs:
return
output_dict['0'] = attrs
# Replace line separators with spaces so line spanning works.
s = re.sub(r'\s', ' ', attrs)
+ d = legacy_parse(s) if get_compat_mode() == 1 else future_parse(s)
+ output_dict.update(d)
+ assert len(d) > 0
+
+
+def future_parse(s: str) -> dict:
+ d = {}
+ key = ''
+ value = ''
+ count = 1
+ quote = None
+ in_quotes = False
+ had_quotes = False
+
+ def add_value():
+ nonlocal count, d, key, value
+ key = key.strip()
+ value = value.strip()
+ if had_quotes:
+ value = value[1:-1]
+
+ if not value and not had_quotes:
+ value = None
+
+ if key:
+ d[key] = value if value else ''
+ key = ''
+ else:
+ d["{}".format(count)] = value
+ count += 1
+ value = ''
+
+ for i in range(len(s)):
+ char = s[i]
+
+ if char == ',' and not in_quotes:
+ add_value()
+ had_quotes = False
+ elif value and char == '=' and not in_quotes:
+ key = value
+ value = ''
+ elif not in_quotes and (char == '"' or char == "'") \
+ and (i == 0 or s[i - 1] != '\\'):
+ in_quotes = True
+ quote = char
+ value += char
+ elif in_quotes and char == quote and (i == 0 or s[i - 1] != '\\'):
+ in_quotes = False
+ had_quotes = True
+ quote = None
+ value += char
+ elif char == '\\' and i < len(s) - 1 and (s[i + 1] == '"' or s[i + 1] == "'"):
+ pass
+ else:
+ value += char
+
+ if key and key[0] == '=' and not value:
+ value = key + "="
+ key = ""
+
+ if not value and s.rstrip()[-1] == ',':
+ value = ' '
+
+ if had_quotes or value or key:
+ add_value()
+ return d
+
+
+def legacy_parse(s: str) -> dict:
d = {}
try:
d.update(get_args(s))
@@ -47,5 +117,4 @@ def parse_attributes(attrs: str, output_dict: typing.Dict) -> None:
for k in list(d.keys()): # Drop any empty positional arguments.
if d[k] == '':
del d[k]
- output_dict.update(d)
- assert len(d) > 0
+ return d
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..2091d6f
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,9 @@
+from asciidoc import set_future_compat, set_legacy_compat
+import pytest
+
+
+@pytest.fixture
+def enable_future_compat() -> None:
+ set_future_compat()
+ yield
+ set_legacy_compat()
diff --git a/tests/test_attrs.py b/tests/test_attrs.py
index 9991f7c..c3b90c6 100644
--- a/tests/test_attrs.py
+++ b/tests/test_attrs.py
@@ -2,9 +2,31 @@ from asciidoc import attrs
import pytest
-@pytest.mark.parametrize(
- "input,expected",
- (
+testcases = {
+ # these test cases fail under future mode
+ "pure_legacy": (
+ # In future mode, all values are always strings
+ (
+ 'height=100,caption="",link="images/octocat.png"',
+ {
+ '0': 'height=100,caption="",link="images/octocat.png"',
+ 'height': 100,
+ 'caption': '',
+ 'link': 'images/octocat.png',
+ },
+ ),
+ (
+ "height=100,caption='',link='images/octocat.png'",
+ {
+ '0': "height=100,caption='',link='images/octocat.png'",
+ 'height': 100,
+ 'caption': '',
+ 'link': 'images/octocat.png',
+ },
+ ),
+ ),
+ # these test cases pass under both legacy and future modes
+ "legacy": (
# docstring tests
('', {}),
('hello,world', {'0': 'hello,world', '1': 'hello', '2': 'world'}),
@@ -14,19 +36,13 @@ import pytest
),
# tests taken from
# https://github.com/asciidoctor/asciidoctor/blob/main/test/attribute_list_test.rb
- # commented out tests are currently supported by asciidoc.py
('quote', {'0': 'quote', '1': 'quote'}),
('"quote"', {'0': '"quote"', '1': 'quote'}),
('""', {'0': '""', '1': ''}),
- # ('"ba\"zaar"', {'0': '"ba\"zaar"', '1': 'ba"zaar'}),
("'quote'", {'0': "'quote'", '1': 'quote'}),
("''", {'0': "''", '1': ''}),
('\'', {'0': '\'', '1': '\''}),
- # ('name=\'', {'0': 'name=\'', 'name': '\''}),
- # ('name=\'{val}', {'0': 'name=\'{val}', 'name': '\'{val}'}),
('\'ba\\\'zaar\'', {'0': '\'ba\\\'zaar\'', '1': 'ba\'zaar'}),
- # ('quote , ', {'0': 'quote , ', '1': 'quote', '2': None}),
- # (', John Smith', {'0': ', John Smith', '1': None, '2': 'John Smith'}),
(
'first, second one, third',
{
@@ -35,85 +51,113 @@ import pytest
'2': 'second one', '3': 'third',
},
),
- # (
- # 'first,,third,',
- # {'0': 'first,,third,', '1': 'first', '2': None, '3': 'third', '4': None}
- # ),
('=foo=', {'0': '=foo=', '1': '=foo='}),
- # ('foo=bar', {'0': 'foo=bar', 'foo': 'bar'}),
('foo="bar"', {'0': 'foo="bar"', 'foo': 'bar'}),
+
+ ('foo=\'bar\'', {'0': 'foo=\'bar\'', 'foo': 'bar'}),
+
+ ),
+ # these tests only pass under future mode
+ # tests taken from
+ # https://github.com/asciidoctor/asciidoctor/blob/main/test/attribute_list_test.rb
+ "future": (
+ ('"ba\"zaar"', {'0': '"ba\"zaar"', '1': 'ba"zaar'}),
+ ('name=\'', {'0': 'name=\'', 'name': '\''}),
+ ('name=\'{val}', {'0': 'name=\'{val}', 'name': '\'{val}'}),
+ ('quote , ', {'0': 'quote , ', '1': 'quote', '2': None}),
+ (', John Smith', {'0': ', John Smith', '1': None, '2': 'John Smith'}),
+ (
+ 'first,,third,',
+ {'0': 'first,,third,', '1': 'first', '2': None, '3': 'third', '4': None}
+ ),
+ ('foo=bar', {'0': 'foo=bar', 'foo': 'bar'}),
+ ('foo=', {'0': 'foo=', 'foo': ''}),
+ ('foo=,bar=baz', {'0': 'foo=,bar=baz', 'foo': '', 'bar': 'baz'}),
(
'height=100,caption="",link="images/octocat.png"',
{
'0': 'height=100,caption="",link="images/octocat.png"',
- 'height': 100,
+ 'height': '100',
'caption': '',
'link': 'images/octocat.png',
},
),
- ('foo=\'bar\'', {'0': 'foo=\'bar\'', 'foo': 'bar'}),
(
"height=100,caption='',link='images/octocat.png'",
{
'0': "height=100,caption='',link='images/octocat.png'",
- 'height': 100,
+ 'height': '100',
'caption': '',
'link': 'images/octocat.png',
},
),
- # ('foo=', {'0': 'foo=', 'foo': ''}),
- # ('foo=,bar=baz', {'0': 'foo=,bar=baz', 'foo': '', 'bar': 'baz'}),
- # (
- # 'first=value, second=two, third=3',
- # {
- # '0': 'first=value, second=two, third=3',
- # 'first': 'value',
- # 'second': 'two',
- # 'third': '3',
- # },
- # ),
- # (
- # 'first=\'value\', second="value two", third=three',
- # {
- # '0': 'first=\'value\', second="value two", third=three',
- # 'first': 'value',
- # 'second': 'value two',
- # 'third': 'three',
- # },
- # ),
- # (
- # " first = 'value', second =\"value two\" , third= three ", # noqa: E501
- # {
- # '0': " first = 'value', second =\"value two\" , third= three ", # noqa: E501
- # 'first': 'value',
- # 'second': 'value two',
- # 'third': 'three',
- # },
- # ),
- # (
- # 'first, second="value two", third=three, Sherlock Holmes',
- # {
- # '0': 'first, second="value two", third=three, Sherlock Holmes',
- # '1': 'first',
- # 'second': 'value two',
- # 'third': 'three',
- # '4': 'Sherlock Holmes',
- # },
- # ),
- # (
- # 'first,,third=,,fifth=five',
- # {
- # '0': 'first,,third=,,fifth=five',
- # '1': 'first',
- # '2': None,
- # 'third': '',
- # '4': None,
- # 'fifth': 'five',
- # },
- # ),
+ (
+ 'first=value, second=two, third=3',
+ {
+ '0': 'first=value, second=two, third=3',
+ 'first': 'value',
+ 'second': 'two',
+ 'third': '3',
+ },
+ ),
+ (
+ 'first=\'value\', second="value two", third=three',
+ {
+ '0': 'first=\'value\', second="value two", third=three',
+ 'first': 'value',
+ 'second': 'value two',
+ 'third': 'three',
+ },
+ ),
+ (
+ " first = 'value', second =\"value two\" , third= three ", # noqa: E501
+ {
+ '0': " first = 'value', second =\"value two\" , third= three ", # noqa: E501
+ 'first': 'value',
+ 'second': 'value two',
+ 'third': 'three',
+ },
+ ),
+ (
+ 'first, second="value two", third=three, Sherlock Holmes',
+ {
+ '0': 'first, second="value two", third=three, Sherlock Holmes',
+ '1': 'first',
+ 'second': 'value two',
+ 'third': 'three',
+ '4': 'Sherlock Holmes',
+ },
+ ),
+ (
+ 'first,,third=,,fifth=five',
+ {
+ '0': 'first,,third=,,fifth=five',
+ '1': 'first',
+ '2': None,
+ 'third': '',
+ '4': None,
+ 'fifth': 'five',
+ },
+ ),
)
+}
+
+
+@pytest.mark.parametrize(
+ "input,expected",
+ testcases["legacy"] + testcases["pure_legacy"],
)
def test_parse_attributes(input, expected):
output = dict()
attrs.parse_attributes(input, output)
assert output == expected
+
+
+@pytest.mark.parametrize(
+ "input,expected",
+ testcases['legacy'] + testcases["future"],
+)
+def test_parse_future_attributes(enable_future_compat, input, expected):
+ output = dict()
+ attrs.parse_attributes(input, output)
+ assert output == expected