From b62311f1bcffda936eab4173e2d94f4cd6f75a1f Mon Sep 17 00:00:00 2001 From: Matthew Peveler Date: Sun, 22 May 2022 11:46:25 -0400 Subject: Add future-compat attribute parsing (#255) --- asciidoc/attrs.py | 77 +++++++++++++++++++++-- tests/conftest.py | 9 +++ tests/test_attrs.py | 178 ++++++++++++++++++++++++++++++++-------------------- 3 files changed, 193 insertions(+), 71 deletions(-) create mode 100644 tests/conftest.py diff --git a/asciidoc/attrs.py b/asciidoc/attrs.py index f61b62b..42a6aae 100644 --- a/asciidoc/attrs.py +++ b/asciidoc/attrs.py @@ -1,6 +1,7 @@ import re import typing +from . import get_compat_mode from .utils import get_args, get_kwargs @@ -15,16 +16,85 @@ def parse_attributes(attrs: str, output_dict: typing.Dict) -> None: output_dict: {} attrs: 'hello,world' - output_dict: {'2': 'world', '0': 'hello,world', '1': 'hello'} + output_dict: {'0': 'hello,world', '1': 'hello', '2': 'world',} attrs: '"hello", planet="earth"' - output_dict: {'planet': 'earth', '0': '"hello", planet="earth"', '1': 'hello'} + output_dict: {'0': '"hello", planet="earth"', '1': 'hello' 'planet': 'earth', } """ if not attrs: return output_dict['0'] = attrs # Replace line separators with spaces so line spanning works. s = re.sub(r'\s', ' ', attrs) + d = legacy_parse(s) if get_compat_mode() == 1 else future_parse(s) + output_dict.update(d) + assert len(d) > 0 + + +def future_parse(s: str) -> dict: + d = {} + key = '' + value = '' + count = 1 + quote = None + in_quotes = False + had_quotes = False + + def add_value(): + nonlocal count, d, key, value + key = key.strip() + value = value.strip() + if had_quotes: + value = value[1:-1] + + if not value and not had_quotes: + value = None + + if key: + d[key] = value if value else '' + key = '' + else: + d["{}".format(count)] = value + count += 1 + value = '' + + for i in range(len(s)): + char = s[i] + + if char == ',' and not in_quotes: + add_value() + had_quotes = False + elif value and char == '=' and not in_quotes: + key = value + value = '' + elif not in_quotes and (char == '"' or char == "'") \ + and (i == 0 or s[i - 1] != '\\'): + in_quotes = True + quote = char + value += char + elif in_quotes and char == quote and (i == 0 or s[i - 1] != '\\'): + in_quotes = False + had_quotes = True + quote = None + value += char + elif char == '\\' and i < len(s) - 1 and (s[i + 1] == '"' or s[i + 1] == "'"): + pass + else: + value += char + + if key and key[0] == '=' and not value: + value = key + "=" + key = "" + + if not value and s.rstrip()[-1] == ',': + value = ' ' + + if had_quotes or value or key: + add_value() + return d + + +def legacy_parse(s: str) -> dict: d = {} try: d.update(get_args(s)) @@ -47,5 +117,4 @@ def parse_attributes(attrs: str, output_dict: typing.Dict) -> None: for k in list(d.keys()): # Drop any empty positional arguments. if d[k] == '': del d[k] - output_dict.update(d) - assert len(d) > 0 + return d diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..2091d6f --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,9 @@ +from asciidoc import set_future_compat, set_legacy_compat +import pytest + + +@pytest.fixture +def enable_future_compat() -> None: + set_future_compat() + yield + set_legacy_compat() diff --git a/tests/test_attrs.py b/tests/test_attrs.py index 9991f7c..c3b90c6 100644 --- a/tests/test_attrs.py +++ b/tests/test_attrs.py @@ -2,9 +2,31 @@ from asciidoc import attrs import pytest -@pytest.mark.parametrize( - "input,expected", - ( +testcases = { + # these test cases fail under future mode + "pure_legacy": ( + # In future mode, all values are always strings + ( + 'height=100,caption="",link="images/octocat.png"', + { + '0': 'height=100,caption="",link="images/octocat.png"', + 'height': 100, + 'caption': '', + 'link': 'images/octocat.png', + }, + ), + ( + "height=100,caption='',link='images/octocat.png'", + { + '0': "height=100,caption='',link='images/octocat.png'", + 'height': 100, + 'caption': '', + 'link': 'images/octocat.png', + }, + ), + ), + # these test cases pass under both legacy and future modes + "legacy": ( # docstring tests ('', {}), ('hello,world', {'0': 'hello,world', '1': 'hello', '2': 'world'}), @@ -14,19 +36,13 @@ import pytest ), # tests taken from # https://github.com/asciidoctor/asciidoctor/blob/main/test/attribute_list_test.rb - # commented out tests are currently supported by asciidoc.py ('quote', {'0': 'quote', '1': 'quote'}), ('"quote"', {'0': '"quote"', '1': 'quote'}), ('""', {'0': '""', '1': ''}), - # ('"ba\"zaar"', {'0': '"ba\"zaar"', '1': 'ba"zaar'}), ("'quote'", {'0': "'quote'", '1': 'quote'}), ("''", {'0': "''", '1': ''}), ('\'', {'0': '\'', '1': '\''}), - # ('name=\'', {'0': 'name=\'', 'name': '\''}), - # ('name=\'{val}', {'0': 'name=\'{val}', 'name': '\'{val}'}), ('\'ba\\\'zaar\'', {'0': '\'ba\\\'zaar\'', '1': 'ba\'zaar'}), - # ('quote , ', {'0': 'quote , ', '1': 'quote', '2': None}), - # (', John Smith', {'0': ', John Smith', '1': None, '2': 'John Smith'}), ( 'first, second one, third', { @@ -35,85 +51,113 @@ import pytest '2': 'second one', '3': 'third', }, ), - # ( - # 'first,,third,', - # {'0': 'first,,third,', '1': 'first', '2': None, '3': 'third', '4': None} - # ), ('=foo=', {'0': '=foo=', '1': '=foo='}), - # ('foo=bar', {'0': 'foo=bar', 'foo': 'bar'}), ('foo="bar"', {'0': 'foo="bar"', 'foo': 'bar'}), + + ('foo=\'bar\'', {'0': 'foo=\'bar\'', 'foo': 'bar'}), + + ), + # these tests only pass under future mode + # tests taken from + # https://github.com/asciidoctor/asciidoctor/blob/main/test/attribute_list_test.rb + "future": ( + ('"ba\"zaar"', {'0': '"ba\"zaar"', '1': 'ba"zaar'}), + ('name=\'', {'0': 'name=\'', 'name': '\''}), + ('name=\'{val}', {'0': 'name=\'{val}', 'name': '\'{val}'}), + ('quote , ', {'0': 'quote , ', '1': 'quote', '2': None}), + (', John Smith', {'0': ', John Smith', '1': None, '2': 'John Smith'}), + ( + 'first,,third,', + {'0': 'first,,third,', '1': 'first', '2': None, '3': 'third', '4': None} + ), + ('foo=bar', {'0': 'foo=bar', 'foo': 'bar'}), + ('foo=', {'0': 'foo=', 'foo': ''}), + ('foo=,bar=baz', {'0': 'foo=,bar=baz', 'foo': '', 'bar': 'baz'}), ( 'height=100,caption="",link="images/octocat.png"', { '0': 'height=100,caption="",link="images/octocat.png"', - 'height': 100, + 'height': '100', 'caption': '', 'link': 'images/octocat.png', }, ), - ('foo=\'bar\'', {'0': 'foo=\'bar\'', 'foo': 'bar'}), ( "height=100,caption='',link='images/octocat.png'", { '0': "height=100,caption='',link='images/octocat.png'", - 'height': 100, + 'height': '100', 'caption': '', 'link': 'images/octocat.png', }, ), - # ('foo=', {'0': 'foo=', 'foo': ''}), - # ('foo=,bar=baz', {'0': 'foo=,bar=baz', 'foo': '', 'bar': 'baz'}), - # ( - # 'first=value, second=two, third=3', - # { - # '0': 'first=value, second=two, third=3', - # 'first': 'value', - # 'second': 'two', - # 'third': '3', - # }, - # ), - # ( - # 'first=\'value\', second="value two", third=three', - # { - # '0': 'first=\'value\', second="value two", third=three', - # 'first': 'value', - # 'second': 'value two', - # 'third': 'three', - # }, - # ), - # ( - # " first = 'value', second =\"value two\" , third= three ", # noqa: E501 - # { - # '0': " first = 'value', second =\"value two\" , third= three ", # noqa: E501 - # 'first': 'value', - # 'second': 'value two', - # 'third': 'three', - # }, - # ), - # ( - # 'first, second="value two", third=three, Sherlock Holmes', - # { - # '0': 'first, second="value two", third=three, Sherlock Holmes', - # '1': 'first', - # 'second': 'value two', - # 'third': 'three', - # '4': 'Sherlock Holmes', - # }, - # ), - # ( - # 'first,,third=,,fifth=five', - # { - # '0': 'first,,third=,,fifth=five', - # '1': 'first', - # '2': None, - # 'third': '', - # '4': None, - # 'fifth': 'five', - # }, - # ), + ( + 'first=value, second=two, third=3', + { + '0': 'first=value, second=two, third=3', + 'first': 'value', + 'second': 'two', + 'third': '3', + }, + ), + ( + 'first=\'value\', second="value two", third=three', + { + '0': 'first=\'value\', second="value two", third=three', + 'first': 'value', + 'second': 'value two', + 'third': 'three', + }, + ), + ( + " first = 'value', second =\"value two\" , third= three ", # noqa: E501 + { + '0': " first = 'value', second =\"value two\" , third= three ", # noqa: E501 + 'first': 'value', + 'second': 'value two', + 'third': 'three', + }, + ), + ( + 'first, second="value two", third=three, Sherlock Holmes', + { + '0': 'first, second="value two", third=three, Sherlock Holmes', + '1': 'first', + 'second': 'value two', + 'third': 'three', + '4': 'Sherlock Holmes', + }, + ), + ( + 'first,,third=,,fifth=five', + { + '0': 'first,,third=,,fifth=five', + '1': 'first', + '2': None, + 'third': '', + '4': None, + 'fifth': 'five', + }, + ), ) +} + + +@pytest.mark.parametrize( + "input,expected", + testcases["legacy"] + testcases["pure_legacy"], ) def test_parse_attributes(input, expected): output = dict() attrs.parse_attributes(input, output) assert output == expected + + +@pytest.mark.parametrize( + "input,expected", + testcases['legacy'] + testcases["future"], +) +def test_parse_future_attributes(enable_future_compat, input, expected): + output = dict() + attrs.parse_attributes(input, output) + assert output == expected -- cgit v1.2.1