diff options
author | Seth M Morton <seth.m.morton@gmail.com> | 2016-05-01 16:41:31 -0700 |
---|---|---|
committer | Seth M Morton <seth.m.morton@gmail.com> | 2016-05-01 16:41:31 -0700 |
commit | d741bc05b820443e8076d2b63d64ed98ba860306 (patch) | |
tree | d9b45153c3a0acdbbdfd7800e280a2b9b94d688a | |
parent | df00419f4e1a1c4ec469b4b3ae76f98cb7c12c3a (diff) | |
download | natsort-d741bc05b820443e8076d2b63d64ed98ba860306.tar.gz |
Added new string and path splitting function generators.
These complete the required components to make natsort more functional.
-rw-r--r-- | natsort/utils.py | 71 | ||||
-rw-r--r-- | test_natsort/slow_splitters.py | 4 | ||||
-rw-r--r-- | test_natsort/test_utils.py | 147 |
3 files changed, 98 insertions, 124 deletions
diff --git a/natsort/utils.py b/natsort/utils.py index 6c2362c..6bffdf9 100644 --- a/natsort/utils.py +++ b/natsort/utils.py @@ -146,39 +146,21 @@ def _natsort_key(val, key, alg): if key is not None: val = key(val) - # If this is a path, convert it. - # An AttrubuteError is raised if not a string. - split_as_path = False - if alg & ns.PATH: - try: - val = _path_splitter(val) - except AttributeError: - pass - else: - # Record that this string was split as a path so that - # we don't set PATH in the recursive call. - split_as_path = True - # Assume the input are strings, which is the most common case. - # Apply the string modification if needed. - orig_val = val try: if use_locale and dumb_sort(): alg |= ns._DUMB - lowfirst = alg & ns.LOWERCASEFIRST - dumb = alg & ns._DUMB - val = _pre_split_function(alg)(val) - gl = alg & ns.GROUPLETTERS - ret = tuple(_number_extracter(val, - regex, - num_function, - use_locale, - gl or (use_locale and dumb))) - # Handle NaN. - if any(x != x for x in ret): - ret = _fix_nan(ret, alg) - val = orig_val if (alg & ns._DUMB) else val - return _post_string_parse_function(alg, null_string)(ret, val) + split = _parse_string_function( + alg, + null_string if use_locale else '', + regex.split, + _pre_split_function(alg), + _post_split_function(alg), + _post_string_parse_function(alg, null_string) + ) + if alg & ns.PATH: + split = _parse_path_function(split) + return split(val) except (TypeError, AttributeError): # Check if it is a bytes type, and if so return as a # one element tuple. @@ -186,12 +168,8 @@ def _natsort_key(val, key, alg): return _parse_bytes_function(alg)(val) # If not strings, assume it is an iterable that must # be parsed recursively. Do not apply the key recursively. - # If this string was split as a path, turn off 'PATH'. try: - was_path = alg & ns.PATH - newalg = alg & ns._ALL_BUT_PATH - newalg |= (was_path * (not split_as_path)) - return tuple([_natsort_key(x, None, newalg) for x in val]) + return tuple([_natsort_key(x, None, alg) for x in val]) # If there is still an error, it must be a number. # Return as-is, with a leading empty string. except TypeError: @@ -225,7 +203,7 @@ def _number_extracter(s, regex, numconv, use_locale, group_letters): def _parse_bytes_function(alg): - """Create a function that will properly format a bytes string in a tuple.""" + """Create a function that will format a bytes string in a tuple.""" if alg & ns.PATH and alg & ns.IGNORECASE: return lambda x: ((x.lower(),),) elif alg & ns.PATH: @@ -248,6 +226,26 @@ def _parse_number_function(alg, sep): return (lambda x: (func(x),)) if alg & ns.PATH else func +def _parse_string_function(alg, sep, splitter, pre, post, after): + """Create a function that will properly split and format a string.""" + def func(x, not_dumb=not (alg & ns._DUMB and alg & ns.LOCALE)): + original = x + x = pre(x) # Apply pre-splitting function + if not_dumb: + original = x + x = splitter(x) # Split the string on numbers + x = py23_filter(None, x) # Remove empty strings. + x = py23_map(post, x) # Apply post-splitting function + x = _sep_inserter(x, sep) # Insert empty strings between numbers + return after(x, original) # Apply final manipulation + return func + + +def _parse_path_function(str_split): + """Create a function that will properly split and format a path.""" + return lambda x: tuple(py23_map(str_split, _path_splitter(x))) + + def _sep_inserter(iterable, sep): """Insert '' between numbers.""" @@ -329,6 +327,7 @@ def _post_string_parse_function(alg, sep): """ if alg & ns.UNGROUPLETTERS and alg & ns.LOCALE: swap = alg & ns._DUMB and alg & ns.LOWERCASEFIRST + def func(split_val, val, f=(lambda x: x.swapcase()) if swap else lambda x: x): @@ -411,7 +410,7 @@ def _path_splitter(s, _d_match=re.compile(r'\.\d').match): b_appendleft(base) # Return the split parent paths and then the split basename. - return tuple(ichain(path_parts, base_parts)) + return ichain(path_parts, base_parts) def _args_to_enum(**kwargs): diff --git a/test_natsort/slow_splitters.py b/test_natsort/slow_splitters.py index b03808e..2cb0ee9 100644 --- a/test_natsort/slow_splitters.py +++ b/test_natsort/slow_splitters.py @@ -24,7 +24,7 @@ def int_splitter(iterable, signed, sep): split_by_digits = refine_split_grouping(split_by_digits) split = int_splitter_iter(split_by_digits, signed) split = sep_inserter(split, sep) - return list(add_leading_space_if_first_is_num(split, sep)) + return tuple(add_leading_space_if_first_is_num(split, sep)) def float_splitter(iterable, signed, exp, sep): @@ -37,7 +37,7 @@ def float_splitter(iterable, signed, exp, sep): split_by_digits = peekable(refine_split_grouping(split_by_digits)) split = float_splitter_iter(split_by_digits, signed, exp) split = sep_inserter(split, sep) - return list(add_leading_space_if_first_is_num(split, sep)) + return tuple(add_leading_space_if_first_is_num(split, sep)) def refine_split_grouping(iterable): diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py index 06a87a6..e6c21c9 100644 --- a/test_natsort/test_utils.py +++ b/test_natsort/test_utils.py @@ -26,6 +26,8 @@ from natsort.utils import ( _do_decoding, _path_splitter, chain_functions, + _parse_string_function, + _parse_path_function, _parse_number_function, _parse_bytes_function, _pre_split_function, @@ -453,9 +455,9 @@ def test_sep_inserter_inserts_separator_between_two_numbers(x): def test_path_splitter_splits_path_string_by_separator_example(): z = '/this/is/a/path' - assert _path_splitter(z) == tuple(pathlib.Path(z).parts) + assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts) z = pathlib.Path('/this/is/a/path') - assert _path_splitter(z) == tuple(pathlib.Path(z).parts) + assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') @@ -463,13 +465,13 @@ def test_path_splitter_splits_path_string_by_separator_example(): def test_path_splitter_splits_path_string_by_separator(x): assume(all(x)) z = py23_str(pathlib.Path(*x)) - assert _path_splitter(z) == tuple(pathlib.Path(z).parts) + assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts) def test_path_splitter_splits_path_string_by_separator_and_removes_extension_example(): z = '/this/is/a/path/file.exe' y = tuple(pathlib.Path(z).parts) - assert _path_splitter(z) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix) + assert tuple(_path_splitter(z)) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') @@ -478,145 +480,118 @@ def test_path_splitter_splits_path_string_by_separator_and_removes_extension(x): assume(all(x)) z = py23_str(pathlib.Path(*x[:-2])) + '.' + x[-1] y = tuple(pathlib.Path(z).parts) - assert _path_splitter(z) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix) + assert tuple(_path_splitter(z)) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix) -def test_number_extracter_raises_TypeError_if_given_a_number_example(): - with raises(TypeError): - assert _number_extracter(50.0, _float_sign_exp_re, *float_nolocale_nogroup) +def no_op(x): + """A function that does nothing.""" + return x -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(floats()) -def test_number_extracter_raises_TypeError_if_given_a_number(x): - with raises(TypeError): - assert _number_extracter(x, _float_sign_exp_re, *float_nolocale_nogroup) +def tuple2(x, dummy): + """Make the input a tuple.""" + return tuple(x) -def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats_example(): - assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *float_nolocale_nogroup) == ['a', 5.0, '', 0.5034] +def test_parse_string_function_raises_TypeError_if_given_a_number_example(): + with raises(TypeError): + assert _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)(50.0) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _number_extracter(s, _float_sign_exp_re, *float_nolocale_nogroup) == float_splitter(s, True, True, '') +@given(floats()) +def test_parse_string_function_raises_TypeError_if_given_a_number(x): + with raises(TypeError): + assert _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)(x) -def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats_example(): - assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *float_nolocale_nogroup) == ['a', 5.0, '+', 0.5034] +def test_parse_string_function_only_parses_digits_with_nosign_int_example(): + assert _parse_string_function(0, '', _int_nosign_re.split, no_op, fast_int, tuple2)('a5+5.034e-1') == ('a', 5, '+', 5, '.', 34, 'e-', 1) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') @given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats(x): - assume(not any(type(y) == float and isnan(y) for y in x)) +@example([10000000000000000000000000000000000000000000000000000000000000000000000000, + 100000000000000000000000000000000000000000000000000000000000000000000000000, + 100000000000000000000000000000000000000000000000000000000000000000000000000]) +def test_parse_string_function_only_parses_digits_with_nosign_int(x): s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _number_extracter(s, _float_nosign_exp_re, *float_nolocale_nogroup) == float_splitter(s, False, True, '') + assert _parse_string_function(0, '', _int_nosign_re.split, no_op, fast_int, tuple2)(s) == int_splitter(s, False, '') -def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats_example(): - assert _number_extracter('a5+5.034e-1', _float_sign_noexp_re, *float_nolocale_nogroup) == ['a', 5.0, '', 5.034, 'e', -1.0] +def test_parse_string_function_parses_digit_with_sign_with_signed_int_example(): + assert _parse_string_function(0, '', _int_sign_re.split, no_op, fast_int, tuple2)('a5+5.034e-1') == ('a', 5, '', 5, '.', 34, 'e', -1) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') @given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats(x): - assume(not any(type(y) == float and isnan(y) for y in x)) +def test_parse_string_function_parses_digit_with_sign_with_signed_int(x): s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _number_extracter(s, _float_sign_noexp_re, *float_nolocale_nogroup) == float_splitter(s, True, False, '') + assert _parse_string_function(0, '', _int_sign_re.split, no_op, fast_int, tuple2)(s) == int_splitter(s, True, '') -def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats_example(): - assert _number_extracter('a5+5.034e-1', _float_nosign_noexp_re, *float_nolocale_nogroup) == ['a', 5.0, '+', 5.034, 'e-', 1.0] +def test_parse_string_function_only_parses_float_with_nosign_noexp_float_example(): + assert _parse_string_function(0, '', _float_nosign_noexp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '+', 5.034, 'e-', 1.0) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') @given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats(x): +def test_parse_string_function_only_parses_float_with_nosign_noexp_float(x): assume(not any(type(y) == float and isnan(y) for y in x)) s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _number_extracter(s, _float_nosign_noexp_re, *float_nolocale_nogroup) == float_splitter(s, False, False, '') + assert _parse_string_function(0, '', _float_nosign_noexp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, False, False, '') -def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints_example(): - assert _number_extracter('a5+5.034e-1', _int_nosign_re, *int_nolocale_nogroup) == ['a', 5, '+', 5, '.', 34, 'e-', 1] +def test_parse_string_function_only_parses_float_with_exponent_with_nosign_exp_float_example(): + assert _parse_string_function(0, '', _float_nosign_exp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '+', 0.5034) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') @given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -@example([10000000000000000000000000000000000000000000000000000000000000000000000000, - 100000000000000000000000000000000000000000000000000000000000000000000000000, - 100000000000000000000000000000000000000000000000000000000000000000000000000]) -def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints(x): - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _number_extracter(s, _int_nosign_re, *int_nolocale_nogroup) == int_splitter(s, False, '') - - -def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints_example(): - assert _number_extracter('a5+5.034e-1', _int_sign_re, *int_nolocale_nogroup) == ['a', 5, '', 5, '.', 34, 'e', -1] - - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints(x): +def test_parse_string_function_only_parses_float_with_exponent_with_nosign_exp_float(x): + assume(not any(type(y) == float and isnan(y) for y in x)) s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _number_extracter(s, _int_sign_re, *int_nolocale_nogroup) == int_splitter(s, True, '') - - -def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_example(): - assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *float_nolocale_nogroup) == ['', 6.0, 'a', 5.0, '', 0.5034] + assert _parse_string_function(0, '', _float_nosign_exp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, False, True, '') -def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float_example(): - assert _number_extracter('A5+5.034E-1', _float_sign_exp_re, *float_nolocale_group) == ['aA', 5.0, '', 0.5034] +def test_parse_string_function_only_parses_float_with_sign_with_sign_noexp_float_example(): + assert _parse_string_function(0, '', _float_sign_noexp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '', 5.034, 'e', -1.0) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') @given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float(x): +def test_parse_string_function_only_parses_float_with_sign_with_sign_noexp_float(x): assume(not any(type(y) == float and isnan(y) for y in x)) s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - t = float_splitter(s, True, True, '') - t = [''.join([low(z) + z for z in y]) if type(y) != float else y for y in t] - assert _number_extracter(s, _float_sign_exp_re, *float_nolocale_group) == t + assert _parse_string_function(0, '', _float_sign_noexp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, True, False, '') -def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int_example(): - assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nolocale_group) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1] +def test_parse_string_function_parses_float_with_sign_exp_float_example(): + assert _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '', 0.5034) + assert _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)('6a5+5.034e-1') == ('', 6.0, 'a', 5.0, '', 0.5034) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') @given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int(x): +def test_parse_string_function_parses_float_with_sign_exp_float(x): + assume(not any(type(y) == float and isnan(y) for y in x)) s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - t = int_splitter(s, False, '') - t = [''.join([low(z) + z for z in y]) if type(y) not in (int, long) else y for y in t] - assert _number_extracter(s, _int_nosign_re, *int_nolocale_group) == t + assert _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, True, True, '') -def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale_example(): - load_locale('en_US') - strxfrm = get_strxfrm() - assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_locale_nogroup) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1] - locale.setlocale(locale.LC_NUMERIC, str('')) +def test_parse_string_function_selects_pre_function_value_if_not_dumb(): + def tuple2(x, orig): + """Make the input a tuple.""" + return (orig[0], tuple(x)) + assert _parse_string_function(0, '', _int_nosign_re.split, str.upper, fast_float, tuple2)('a5+5.034e-1') == ('A', ('A', 5, '+', 5, '.', 34, 'E-', 1)) + assert _parse_string_function(ns._DUMB, '', _int_nosign_re.split, str.upper, fast_float, tuple2)('a5+5.034e-1') == ('A', ('A', 5, '+', 5, '.', 34, 'E-', 1)) + assert _parse_string_function(ns.LOCALE, '', _int_nosign_re.split, str.upper, fast_float, tuple2)('a5+5.034e-1') == ('A', ('A', 5, '+', 5, '.', 34, 'E-', 1)) + assert _parse_string_function(ns.LOCALE | ns._DUMB, '', _int_nosign_re.split, str.upper, fast_float, tuple2)('a5+5.034e-1') == ('a', ('A', 5, '+', 5, '.', 34, 'E-', 1)) -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale(x): - load_locale('en_US') - assume(not any(any(i in bad_uni_chars for i in y) for y in x if isinstance(y, py23_str))) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - t = int_splitter(s, False, null_string) - try: # Account for locale bug on Python 3.2 - t = [y if i == 0 and y is null_string else locale_convert(y) if not isinstance(y, (float, long, int)) else y for i, y in enumerate(t)] - assert _number_extracter(s, _int_nosign_re, *int_locale_nogroup) == t - except OverflowError: - pass - locale.setlocale(locale.LC_NUMERIC, str('')) +def test_parse_path_function_parses_string_as_path_then_as_string(): + splt = _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2) + assert _parse_path_function(splt)('/p/Folder (10)/file34.5nm (2).tar.gz') == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) def test__natsort_key_with_nan_input_transforms_nan_to_negative_inf(): |