From 45fb266f0f673724292759fbd7697d452a32f3b8 Mon Sep 17 00:00:00 2001 From: Matthew Peveler Date: Tue, 1 Mar 2022 10:35:33 -0500 Subject: Split attribute parsing into own module with tests (#246) --- asciidoc/asciidoc.py | 61 +------------------------- asciidoc/attrs.py | 51 ++++++++++++++++++++++ tests/test_attrs.py | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+), 59 deletions(-) create mode 100644 asciidoc/attrs.py create mode 100644 tests/test_attrs.py diff --git a/asciidoc/asciidoc.py b/asciidoc/asciidoc.py index b158e44..f9607a6 100644 --- a/asciidoc/asciidoc.py +++ b/asciidoc/asciidoc.py @@ -32,6 +32,7 @@ import unicodedata from collections import OrderedDict +from .attrs import parse_attributes from .blocks.table import parse_table_span_spec, Cell, Column from .collections import AttrDict, InsensitiveDict from .exceptions import EAsciiDoc @@ -146,59 +147,6 @@ def safe_filename(fname, parentdir): return fname -def parse_attributes(attrs, dict): - """Update a dictionary with name/value attributes from the attrs string. - The attrs string is a comma separated list of values and keyword name=value - pairs. Values must precede keywords and are named '1','2'... The entire - attributes list is named '0'. If keywords are specified string values must - be quoted. Examples: - - attrs: '' - dict: {} - - attrs: 'hello,world' - dict: {'2': 'world', '0': 'hello,world', '1': 'hello'} - - attrs: '"hello", planet="earth"' - dict: {'planet': 'earth', '0': '"hello",planet="earth"', '1': 'hello'} - """ - def f(*args, **keywords): - # Name and add arguments '1','2'... to keywords. - for i in range(len(args)): - if not str(i + 1) in keywords: - keywords[str(i + 1)] = args[i] - return keywords - - if not attrs: - return - dict['0'] = attrs - # Replace line separators with spaces so line spanning works. - s = re.sub(r'\s', ' ', attrs) - d = {} - try: - d.update(utils.get_args(s)) - d.update(utils.get_kwargs(s)) - for v in list(d.values()): - if not (isinstance(v, str) or isinstance(v, int) or isinstance(v, float) or v is None): - raise Exception - except Exception: - s = s.replace('"', '\\"') - s = s.split(',') - s = ['"' + x.strip() + '"' for x in s] - s = ','.join(s) - try: - d = {} - d.update(utils.get_args(s)) - d.update(utils.get_kwargs(s)) - except Exception: - return # If there's a syntax error leave with {0}=attrs. - for k in list(d.keys()): # Drop any empty positional arguments. - if d[k] == '': - del d[k] - dict.update(d) - assert len(d) > 0 - - def parse_named_attributes(s, attrs): """Update a attrs dictionary with name="value" attributes from the s string. Returns False if invalid syntax. @@ -218,7 +166,7 @@ def parse_named_attributes(s, attrs): return False -def parse_list(s): +def parse_list(s) -> typing.Tuple: """Parse comma separated string of Python literals. Return a tuple of of parsed values.""" try: @@ -242,11 +190,6 @@ def parse_options(options, allowed, errmsg): return tuple(result) -def symbolize(s): - """Drop non-symbol characters and convert to lowercase.""" - return re.sub(r'[^\w\-_]', '', s).lower() - - def is_name(s): """Return True if s is valid attribute, macro or tag name (starts with alpha containing alphanumeric and dashes only).""" diff --git a/asciidoc/attrs.py b/asciidoc/attrs.py new file mode 100644 index 0000000..f61b62b --- /dev/null +++ b/asciidoc/attrs.py @@ -0,0 +1,51 @@ +import re +import typing + +from .utils import get_args, get_kwargs + + +def parse_attributes(attrs: str, output_dict: typing.Dict) -> None: + """Update a dictionary with name/value attributes from the attrs string. + The attrs string is a comma separated list of values and keyword name=value + pairs. Values must precede keywords and are named '1','2'... The entire + attributes list is named '0'. If keywords are specified string values must + be quoted. Examples: + + attrs: '' + output_dict: {} + + attrs: 'hello,world' + output_dict: {'2': 'world', '0': 'hello,world', '1': 'hello'} + + attrs: '"hello", planet="earth"' + output_dict: {'planet': 'earth', '0': '"hello", planet="earth"', '1': 'hello'} + """ + if not attrs: + return + output_dict['0'] = attrs + # Replace line separators with spaces so line spanning works. + s = re.sub(r'\s', ' ', attrs) + d = {} + try: + d.update(get_args(s)) + d.update(get_kwargs(s)) + for v in list(d.values()): + if not (isinstance(v, str) + or isinstance(v, int) or isinstance(v, float) or v is None): + raise Exception + except Exception: + s = s.replace('"', '\\"') + s = s.split(',') + s = ['"' + x.strip() + '"' for x in s] + s = ','.join(s) + try: + d = {} + d.update(get_args(s)) + d.update(get_kwargs(s)) + except Exception: + return # If there's a syntax error leave with {0}=attrs. + for k in list(d.keys()): # Drop any empty positional arguments. + if d[k] == '': + del d[k] + output_dict.update(d) + assert len(d) > 0 diff --git a/tests/test_attrs.py b/tests/test_attrs.py new file mode 100644 index 0000000..9991f7c --- /dev/null +++ b/tests/test_attrs.py @@ -0,0 +1,119 @@ +from asciidoc import attrs +import pytest + + +@pytest.mark.parametrize( + "input,expected", + ( + # docstring tests + ('', {}), + ('hello,world', {'0': 'hello,world', '1': 'hello', '2': 'world'}), + ( + '"hello", planet="earth"', + {'0': '"hello", planet="earth"', '1': 'hello', 'planet': 'earth'} + ), + # tests taken from + # https://github.com/asciidoctor/asciidoctor/blob/main/test/attribute_list_test.rb + # commented out tests are currently supported by asciidoc.py + ('quote', {'0': 'quote', '1': 'quote'}), + ('"quote"', {'0': '"quote"', '1': 'quote'}), + ('""', {'0': '""', '1': ''}), + # ('"ba\"zaar"', {'0': '"ba\"zaar"', '1': 'ba"zaar'}), + ("'quote'", {'0': "'quote'", '1': 'quote'}), + ("''", {'0': "''", '1': ''}), + ('\'', {'0': '\'', '1': '\''}), + # ('name=\'', {'0': 'name=\'', 'name': '\''}), + # ('name=\'{val}', {'0': 'name=\'{val}', 'name': '\'{val}'}), + ('\'ba\\\'zaar\'', {'0': '\'ba\\\'zaar\'', '1': 'ba\'zaar'}), + # ('quote , ', {'0': 'quote , ', '1': 'quote', '2': None}), + # (', John Smith', {'0': ', John Smith', '1': None, '2': 'John Smith'}), + ( + 'first, second one, third', + { + '0': 'first, second one, third', + '1': 'first', + '2': 'second one', '3': 'third', + }, + ), + # ( + # 'first,,third,', + # {'0': 'first,,third,', '1': 'first', '2': None, '3': 'third', '4': None} + # ), + ('=foo=', {'0': '=foo=', '1': '=foo='}), + # ('foo=bar', {'0': 'foo=bar', 'foo': 'bar'}), + ('foo="bar"', {'0': 'foo="bar"', 'foo': 'bar'}), + ( + 'height=100,caption="",link="images/octocat.png"', + { + '0': 'height=100,caption="",link="images/octocat.png"', + 'height': 100, + 'caption': '', + 'link': 'images/octocat.png', + }, + ), + ('foo=\'bar\'', {'0': 'foo=\'bar\'', 'foo': 'bar'}), + ( + "height=100,caption='',link='images/octocat.png'", + { + '0': "height=100,caption='',link='images/octocat.png'", + 'height': 100, + 'caption': '', + 'link': 'images/octocat.png', + }, + ), + # ('foo=', {'0': 'foo=', 'foo': ''}), + # ('foo=,bar=baz', {'0': 'foo=,bar=baz', 'foo': '', 'bar': 'baz'}), + # ( + # 'first=value, second=two, third=3', + # { + # '0': 'first=value, second=two, third=3', + # 'first': 'value', + # 'second': 'two', + # 'third': '3', + # }, + # ), + # ( + # 'first=\'value\', second="value two", third=three', + # { + # '0': 'first=\'value\', second="value two", third=three', + # 'first': 'value', + # 'second': 'value two', + # 'third': 'three', + # }, + # ), + # ( + # " first = 'value', second =\"value two\" , third= three ", # noqa: E501 + # { + # '0': " first = 'value', second =\"value two\" , third= three ", # noqa: E501 + # 'first': 'value', + # 'second': 'value two', + # 'third': 'three', + # }, + # ), + # ( + # 'first, second="value two", third=three, Sherlock Holmes', + # { + # '0': 'first, second="value two", third=three, Sherlock Holmes', + # '1': 'first', + # 'second': 'value two', + # 'third': 'three', + # '4': 'Sherlock Holmes', + # }, + # ), + # ( + # 'first,,third=,,fifth=five', + # { + # '0': 'first,,third=,,fifth=five', + # '1': 'first', + # '2': None, + # 'third': '', + # '4': None, + # 'fifth': 'five', + # }, + # ), + ) +) +def test_parse_attributes(input, expected): + output = dict() + attrs.parse_attributes(input, output) + assert output == expected -- cgit v1.2.1