summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth M Morton <seth.m.morton@gmail.com>2016-05-01 16:41:31 -0700
committerSeth M Morton <seth.m.morton@gmail.com>2016-05-01 16:41:31 -0700
commitd741bc05b820443e8076d2b63d64ed98ba860306 (patch)
treed9b45153c3a0acdbbdfd7800e280a2b9b94d688a
parentdf00419f4e1a1c4ec469b4b3ae76f98cb7c12c3a (diff)
downloadnatsort-d741bc05b820443e8076d2b63d64ed98ba860306.tar.gz
Added new string and path splitting function generators.
These complete the required components to make natsort more functional.
-rw-r--r--natsort/utils.py71
-rw-r--r--test_natsort/slow_splitters.py4
-rw-r--r--test_natsort/test_utils.py147
3 files changed, 98 insertions, 124 deletions
diff --git a/natsort/utils.py b/natsort/utils.py
index 6c2362c..6bffdf9 100644
--- a/natsort/utils.py
+++ b/natsort/utils.py
@@ -146,39 +146,21 @@ def _natsort_key(val, key, alg):
if key is not None:
val = key(val)
- # If this is a path, convert it.
- # An AttrubuteError is raised if not a string.
- split_as_path = False
- if alg & ns.PATH:
- try:
- val = _path_splitter(val)
- except AttributeError:
- pass
- else:
- # Record that this string was split as a path so that
- # we don't set PATH in the recursive call.
- split_as_path = True
-
# Assume the input are strings, which is the most common case.
- # Apply the string modification if needed.
- orig_val = val
try:
if use_locale and dumb_sort():
alg |= ns._DUMB
- lowfirst = alg & ns.LOWERCASEFIRST
- dumb = alg & ns._DUMB
- val = _pre_split_function(alg)(val)
- gl = alg & ns.GROUPLETTERS
- ret = tuple(_number_extracter(val,
- regex,
- num_function,
- use_locale,
- gl or (use_locale and dumb)))
- # Handle NaN.
- if any(x != x for x in ret):
- ret = _fix_nan(ret, alg)
- val = orig_val if (alg & ns._DUMB) else val
- return _post_string_parse_function(alg, null_string)(ret, val)
+ split = _parse_string_function(
+ alg,
+ null_string if use_locale else '',
+ regex.split,
+ _pre_split_function(alg),
+ _post_split_function(alg),
+ _post_string_parse_function(alg, null_string)
+ )
+ if alg & ns.PATH:
+ split = _parse_path_function(split)
+ return split(val)
except (TypeError, AttributeError):
# Check if it is a bytes type, and if so return as a
# one element tuple.
@@ -186,12 +168,8 @@ def _natsort_key(val, key, alg):
return _parse_bytes_function(alg)(val)
# If not strings, assume it is an iterable that must
# be parsed recursively. Do not apply the key recursively.
- # If this string was split as a path, turn off 'PATH'.
try:
- was_path = alg & ns.PATH
- newalg = alg & ns._ALL_BUT_PATH
- newalg |= (was_path * (not split_as_path))
- return tuple([_natsort_key(x, None, newalg) for x in val])
+ return tuple([_natsort_key(x, None, alg) for x in val])
# If there is still an error, it must be a number.
# Return as-is, with a leading empty string.
except TypeError:
@@ -225,7 +203,7 @@ def _number_extracter(s, regex, numconv, use_locale, group_letters):
def _parse_bytes_function(alg):
- """Create a function that will properly format a bytes string in a tuple."""
+ """Create a function that will format a bytes string in a tuple."""
if alg & ns.PATH and alg & ns.IGNORECASE:
return lambda x: ((x.lower(),),)
elif alg & ns.PATH:
@@ -248,6 +226,26 @@ def _parse_number_function(alg, sep):
return (lambda x: (func(x),)) if alg & ns.PATH else func
+def _parse_string_function(alg, sep, splitter, pre, post, after):
+ """Create a function that will properly split and format a string."""
+ def func(x, not_dumb=not (alg & ns._DUMB and alg & ns.LOCALE)):
+ original = x
+ x = pre(x) # Apply pre-splitting function
+ if not_dumb:
+ original = x
+ x = splitter(x) # Split the string on numbers
+ x = py23_filter(None, x) # Remove empty strings.
+ x = py23_map(post, x) # Apply post-splitting function
+ x = _sep_inserter(x, sep) # Insert empty strings between numbers
+ return after(x, original) # Apply final manipulation
+ return func
+
+
+def _parse_path_function(str_split):
+ """Create a function that will properly split and format a path."""
+ return lambda x: tuple(py23_map(str_split, _path_splitter(x)))
+
+
def _sep_inserter(iterable, sep):
"""Insert '' between numbers."""
@@ -329,6 +327,7 @@ def _post_string_parse_function(alg, sep):
"""
if alg & ns.UNGROUPLETTERS and alg & ns.LOCALE:
swap = alg & ns._DUMB and alg & ns.LOWERCASEFIRST
+
def func(split_val,
val,
f=(lambda x: x.swapcase()) if swap else lambda x: x):
@@ -411,7 +410,7 @@ def _path_splitter(s, _d_match=re.compile(r'\.\d').match):
b_appendleft(base)
# Return the split parent paths and then the split basename.
- return tuple(ichain(path_parts, base_parts))
+ return ichain(path_parts, base_parts)
def _args_to_enum(**kwargs):
diff --git a/test_natsort/slow_splitters.py b/test_natsort/slow_splitters.py
index b03808e..2cb0ee9 100644
--- a/test_natsort/slow_splitters.py
+++ b/test_natsort/slow_splitters.py
@@ -24,7 +24,7 @@ def int_splitter(iterable, signed, sep):
split_by_digits = refine_split_grouping(split_by_digits)
split = int_splitter_iter(split_by_digits, signed)
split = sep_inserter(split, sep)
- return list(add_leading_space_if_first_is_num(split, sep))
+ return tuple(add_leading_space_if_first_is_num(split, sep))
def float_splitter(iterable, signed, exp, sep):
@@ -37,7 +37,7 @@ def float_splitter(iterable, signed, exp, sep):
split_by_digits = peekable(refine_split_grouping(split_by_digits))
split = float_splitter_iter(split_by_digits, signed, exp)
split = sep_inserter(split, sep)
- return list(add_leading_space_if_first_is_num(split, sep))
+ return tuple(add_leading_space_if_first_is_num(split, sep))
def refine_split_grouping(iterable):
diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py
index 06a87a6..e6c21c9 100644
--- a/test_natsort/test_utils.py
+++ b/test_natsort/test_utils.py
@@ -26,6 +26,8 @@ from natsort.utils import (
_do_decoding,
_path_splitter,
chain_functions,
+ _parse_string_function,
+ _parse_path_function,
_parse_number_function,
_parse_bytes_function,
_pre_split_function,
@@ -453,9 +455,9 @@ def test_sep_inserter_inserts_separator_between_two_numbers(x):
def test_path_splitter_splits_path_string_by_separator_example():
z = '/this/is/a/path'
- assert _path_splitter(z) == tuple(pathlib.Path(z).parts)
+ assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts)
z = pathlib.Path('/this/is/a/path')
- assert _path_splitter(z) == tuple(pathlib.Path(z).parts)
+ assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
@@ -463,13 +465,13 @@ def test_path_splitter_splits_path_string_by_separator_example():
def test_path_splitter_splits_path_string_by_separator(x):
assume(all(x))
z = py23_str(pathlib.Path(*x))
- assert _path_splitter(z) == tuple(pathlib.Path(z).parts)
+ assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts)
def test_path_splitter_splits_path_string_by_separator_and_removes_extension_example():
z = '/this/is/a/path/file.exe'
y = tuple(pathlib.Path(z).parts)
- assert _path_splitter(z) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix)
+ assert tuple(_path_splitter(z)) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
@@ -478,145 +480,118 @@ def test_path_splitter_splits_path_string_by_separator_and_removes_extension(x):
assume(all(x))
z = py23_str(pathlib.Path(*x[:-2])) + '.' + x[-1]
y = tuple(pathlib.Path(z).parts)
- assert _path_splitter(z) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix)
+ assert tuple(_path_splitter(z)) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix)
-def test_number_extracter_raises_TypeError_if_given_a_number_example():
- with raises(TypeError):
- assert _number_extracter(50.0, _float_sign_exp_re, *float_nolocale_nogroup)
+def no_op(x):
+ """A function that does nothing."""
+ return x
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(floats())
-def test_number_extracter_raises_TypeError_if_given_a_number(x):
- with raises(TypeError):
- assert _number_extracter(x, _float_sign_exp_re, *float_nolocale_nogroup)
+def tuple2(x, dummy):
+ """Make the input a tuple."""
+ return tuple(x)
-def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats_example():
- assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *float_nolocale_nogroup) == ['a', 5.0, '', 0.5034]
+def test_parse_string_function_raises_TypeError_if_given_a_number_example():
+ with raises(TypeError):
+ assert _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)(50.0)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _number_extracter(s, _float_sign_exp_re, *float_nolocale_nogroup) == float_splitter(s, True, True, '')
+@given(floats())
+def test_parse_string_function_raises_TypeError_if_given_a_number(x):
+ with raises(TypeError):
+ assert _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)(x)
-def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats_example():
- assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *float_nolocale_nogroup) == ['a', 5.0, '+', 0.5034]
+def test_parse_string_function_only_parses_digits_with_nosign_int_example():
+ assert _parse_string_function(0, '', _int_nosign_re.split, no_op, fast_int, tuple2)('a5+5.034e-1') == ('a', 5, '+', 5, '.', 34, 'e-', 1)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
+@example([10000000000000000000000000000000000000000000000000000000000000000000000000,
+ 100000000000000000000000000000000000000000000000000000000000000000000000000,
+ 100000000000000000000000000000000000000000000000000000000000000000000000000])
+def test_parse_string_function_only_parses_digits_with_nosign_int(x):
s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _number_extracter(s, _float_nosign_exp_re, *float_nolocale_nogroup) == float_splitter(s, False, True, '')
+ assert _parse_string_function(0, '', _int_nosign_re.split, no_op, fast_int, tuple2)(s) == int_splitter(s, False, '')
-def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats_example():
- assert _number_extracter('a5+5.034e-1', _float_sign_noexp_re, *float_nolocale_nogroup) == ['a', 5.0, '', 5.034, 'e', -1.0]
+def test_parse_string_function_parses_digit_with_sign_with_signed_int_example():
+ assert _parse_string_function(0, '', _int_sign_re.split, no_op, fast_int, tuple2)('a5+5.034e-1') == ('a', 5, '', 5, '.', 34, 'e', -1)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
+def test_parse_string_function_parses_digit_with_sign_with_signed_int(x):
s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _number_extracter(s, _float_sign_noexp_re, *float_nolocale_nogroup) == float_splitter(s, True, False, '')
+ assert _parse_string_function(0, '', _int_sign_re.split, no_op, fast_int, tuple2)(s) == int_splitter(s, True, '')
-def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats_example():
- assert _number_extracter('a5+5.034e-1', _float_nosign_noexp_re, *float_nolocale_nogroup) == ['a', 5.0, '+', 5.034, 'e-', 1.0]
+def test_parse_string_function_only_parses_float_with_nosign_noexp_float_example():
+ assert _parse_string_function(0, '', _float_nosign_noexp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '+', 5.034, 'e-', 1.0)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats(x):
+def test_parse_string_function_only_parses_float_with_nosign_noexp_float(x):
assume(not any(type(y) == float and isnan(y) for y in x))
s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _number_extracter(s, _float_nosign_noexp_re, *float_nolocale_nogroup) == float_splitter(s, False, False, '')
+ assert _parse_string_function(0, '', _float_nosign_noexp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, False, False, '')
-def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints_example():
- assert _number_extracter('a5+5.034e-1', _int_nosign_re, *int_nolocale_nogroup) == ['a', 5, '+', 5, '.', 34, 'e-', 1]
+def test_parse_string_function_only_parses_float_with_exponent_with_nosign_exp_float_example():
+ assert _parse_string_function(0, '', _float_nosign_exp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '+', 0.5034)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-@example([10000000000000000000000000000000000000000000000000000000000000000000000000,
- 100000000000000000000000000000000000000000000000000000000000000000000000000,
- 100000000000000000000000000000000000000000000000000000000000000000000000000])
-def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints(x):
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _number_extracter(s, _int_nosign_re, *int_nolocale_nogroup) == int_splitter(s, False, '')
-
-
-def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints_example():
- assert _number_extracter('a5+5.034e-1', _int_sign_re, *int_nolocale_nogroup) == ['a', 5, '', 5, '.', 34, 'e', -1]
-
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints(x):
+def test_parse_string_function_only_parses_float_with_exponent_with_nosign_exp_float(x):
+ assume(not any(type(y) == float and isnan(y) for y in x))
s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _number_extracter(s, _int_sign_re, *int_nolocale_nogroup) == int_splitter(s, True, '')
-
-
-def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_example():
- assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *float_nolocale_nogroup) == ['', 6.0, 'a', 5.0, '', 0.5034]
+ assert _parse_string_function(0, '', _float_nosign_exp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, False, True, '')
-def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float_example():
- assert _number_extracter('A5+5.034E-1', _float_sign_exp_re, *float_nolocale_group) == ['aA', 5.0, '', 0.5034]
+def test_parse_string_function_only_parses_float_with_sign_with_sign_noexp_float_example():
+ assert _parse_string_function(0, '', _float_sign_noexp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '', 5.034, 'e', -1.0)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float(x):
+def test_parse_string_function_only_parses_float_with_sign_with_sign_noexp_float(x):
assume(not any(type(y) == float and isnan(y) for y in x))
s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- t = float_splitter(s, True, True, '')
- t = [''.join([low(z) + z for z in y]) if type(y) != float else y for y in t]
- assert _number_extracter(s, _float_sign_exp_re, *float_nolocale_group) == t
+ assert _parse_string_function(0, '', _float_sign_noexp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, True, False, '')
-def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int_example():
- assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nolocale_group) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1]
+def test_parse_string_function_parses_float_with_sign_exp_float_example():
+ assert _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '', 0.5034)
+ assert _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)('6a5+5.034e-1') == ('', 6.0, 'a', 5.0, '', 0.5034)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int(x):
+def test_parse_string_function_parses_float_with_sign_exp_float(x):
+ assume(not any(type(y) == float and isnan(y) for y in x))
s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- t = int_splitter(s, False, '')
- t = [''.join([low(z) + z for z in y]) if type(y) not in (int, long) else y for y in t]
- assert _number_extracter(s, _int_nosign_re, *int_nolocale_group) == t
+ assert _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, True, True, '')
-def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale_example():
- load_locale('en_US')
- strxfrm = get_strxfrm()
- assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_locale_nogroup) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1]
- locale.setlocale(locale.LC_NUMERIC, str(''))
+def test_parse_string_function_selects_pre_function_value_if_not_dumb():
+ def tuple2(x, orig):
+ """Make the input a tuple."""
+ return (orig[0], tuple(x))
+ assert _parse_string_function(0, '', _int_nosign_re.split, str.upper, fast_float, tuple2)('a5+5.034e-1') == ('A', ('A', 5, '+', 5, '.', 34, 'E-', 1))
+ assert _parse_string_function(ns._DUMB, '', _int_nosign_re.split, str.upper, fast_float, tuple2)('a5+5.034e-1') == ('A', ('A', 5, '+', 5, '.', 34, 'E-', 1))
+ assert _parse_string_function(ns.LOCALE, '', _int_nosign_re.split, str.upper, fast_float, tuple2)('a5+5.034e-1') == ('A', ('A', 5, '+', 5, '.', 34, 'E-', 1))
+ assert _parse_string_function(ns.LOCALE | ns._DUMB, '', _int_nosign_re.split, str.upper, fast_float, tuple2)('a5+5.034e-1') == ('a', ('A', 5, '+', 5, '.', 34, 'E-', 1))
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale(x):
- load_locale('en_US')
- assume(not any(any(i in bad_uni_chars for i in y) for y in x if isinstance(y, py23_str)))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- t = int_splitter(s, False, null_string)
- try: # Account for locale bug on Python 3.2
- t = [y if i == 0 and y is null_string else locale_convert(y) if not isinstance(y, (float, long, int)) else y for i, y in enumerate(t)]
- assert _number_extracter(s, _int_nosign_re, *int_locale_nogroup) == t
- except OverflowError:
- pass
- locale.setlocale(locale.LC_NUMERIC, str(''))
+def test_parse_path_function_parses_string_as_path_then_as_string():
+ splt = _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)
+ assert _parse_path_function(splt)('/p/Folder (10)/file34.5nm (2).tar.gz') == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
def test__natsort_key_with_nan_input_transforms_nan_to_negative_inf():