diff options
author | Seth M Morton <seth.m.morton@gmail.com> | 2016-05-02 22:37:09 -0700 |
---|---|---|
committer | Seth M Morton <seth.m.morton@gmail.com> | 2016-05-02 23:20:59 -0700 |
commit | 81d09ffc518a39f9ae524202671d8077679c897d (patch) | |
tree | 77b6ed7f5b14c795e9931a99d95e0694913fb7bd | |
parent | ca7d4fc9aa840ca47b46dfed5e31eaa261245226 (diff) | |
download | natsort-81d09ffc518a39f9ae524202671d8077679c897d.tar.gz |
Redefined _natsort_key in a functional style.
This required changing the input to be the functions created by
all the function factories that have been recently committed. This
had a cascading effect that required a re-write of several unit tests,
as well as the top level natsort_keygen function, which now has the
job of creating the appropriate functions on the fly to pass to
_natsort_key.
A future commit will add more testing to natsort_keygen.
For the moment, handling of locale-specific numbers is disabled, but
will be added back in a future commit.
-rw-r--r-- | natsort/__main__.py | 13 | ||||
-rw-r--r-- | natsort/natsort.py | 58 | ||||
-rw-r--r-- | natsort/utils.py | 142 | ||||
-rw-r--r-- | test_natsort/test_natsort.py | 32 | ||||
-rw-r--r-- | test_natsort/test_utils.py | 357 |
5 files changed, 194 insertions, 408 deletions
diff --git a/natsort/__main__.py b/natsort/__main__.py index 03c4551..5361ea2 100644 --- a/natsort/__main__.py +++ b/natsort/__main__.py @@ -11,7 +11,7 @@ import sys # Local imports. from natsort.natsort import natsorted, ns -from natsort.utils import _regex_and_num_function_chooser +from natsort.utils import _regex_chooser from natsort._version import __version__ from natsort.compat.py23 import py23_str @@ -171,27 +171,26 @@ def sort_and_print_entries(entries, args): if do_filter or args.exclude: inp_options = (ns.FLOAT * is_float | ns.SIGNED * signed | - ns.NOEXP * (not args.exp), - '.' + ns.NOEXP * (not args.exp) ) - regex, num_function = _regex_and_num_function_chooser[inp_options] + regex = _regex_chooser[inp_options] if args.filter is not None: lows, highs = ([f[0] for f in args.filter], [f[1] for f in args.filter]) entries = [entry for entry in entries if keep_entry_range(entry, lows, highs, - num_function, regex)] + float, regex)] if args.reverse_filter is not None: lows, highs = ([f[0] for f in args.reverse_filter], [f[1] for f in args.reverse_filter]) entries = [entry for entry in entries if not keep_entry_range(entry, lows, highs, - num_function, regex)] + float, regex)] if args.exclude: exclude = set(args.exclude) entries = [entry for entry in entries if exclude_entry(entry, exclude, - num_function, regex)] + float, regex)] # Print off the sorted results for entry in natsorted(entries, reverse=args.reverse, alg=alg): diff --git a/natsort/natsort.py b/natsort/natsort.py index 419b876..df2f32a 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -18,18 +18,32 @@ from __future__ import ( ) # Std lib. imports. -import re from operator import itemgetter from functools import partial from warnings import warn # Local imports. from natsort.ns_enum import ns -from natsort.compat.py23 import u_format +from natsort.compat.py23 import ( + u_format, + py23_str, +) +from natsort.compat.locale import ( + null_string, + dumb_sort, +) from natsort.utils import ( _natsort_key, _args_to_enum, _do_decoding, + _regex_chooser, + _parse_string_function, + _parse_path_function, + _parse_number_function, + _parse_bytes_function, + _pre_split_function, + _post_split_function, + _post_string_parse_function, ) # Make sure the doctest works for either python2 or python3 @@ -132,7 +146,7 @@ def natsort_key(val, key=None, alg=0, **_kwargs): """Undocumented, kept for backwards-compatibility.""" msg = "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" warn(msg, DeprecationWarning) - return _natsort_key(val, key, _args_to_enum(**_kwargs) | alg) + return natsort_keygen(key, alg, **_kwargs)(val) @u_format @@ -183,7 +197,43 @@ def natsort_keygen(key=None, alg=0, **_kwargs): [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3'] """ - return partial(_natsort_key, key=key, alg=_args_to_enum(**_kwargs) | alg) + # Transform old arguments to the ns enum. + try: + alg = _args_to_enum(**_kwargs) | alg + except TypeError: + msg = "natsort_keygen: 'alg' argument must be from the enum 'ns'" + raise ValueError(msg+', got {0}'.format(py23_str(alg))) + + # Add the _DUMB option if the locale library is broken. + if alg & ns.LOCALE and dumb_sort(): + alg |= ns._DUMB + + # Set some variable that will be passed to the factory functions + sep = null_string if alg & ns.LOCALE else '' + regex = _regex_chooser[alg & ns._NUMERIC_ONLY] + + # Create the functions that will be used to split strings. + pre = _pre_split_function(alg) + post = _post_split_function(alg) + after = _post_string_parse_function(alg, sep) + + # Create the high-level parsing functions for strings, bytes, and numbers. + string_func = _parse_string_function( + alg, sep, regex.split, pre, post, after + ) + if alg & ns.PATH: + string_func = _parse_path_function(string_func) + bytes_func = _parse_bytes_function(alg) + num_func = _parse_number_function(alg, sep) + + # Return the natsort key with the parsing path pre-chosen. + return partial( + _natsort_key, + key=key, + string_func=string_func, + bytes_func=bytes_func, + num_func=num_func + ) @u_format diff --git a/natsort/utils.py b/natsort/utils.py index aa16db0..d47c747 100644 --- a/natsort/utils.py +++ b/natsort/utils.py @@ -17,7 +17,6 @@ from warnings import warn from os import curdir as os_curdir, pardir as os_pardir from os.path import split as path_split, splitext as path_splitext from itertools import chain as ichain -from locale import localeconv from collections import deque from functools import partial from operator import methodcaller @@ -33,11 +32,7 @@ from natsort.compat.py23 import ( py23_filter, PY_VERSION, ) -from natsort.compat.locale import ( - dumb_sort, - use_pyicu, - null_string, -) +from natsort.compat.locale import use_pyicu from natsort.compat.fastnumbers import ( fast_float, fast_int, @@ -48,7 +43,6 @@ if sys.version[0] == '3': # The regex that locates floats - include Unicode numerals. _exp = r'(?:[eE][-+]?[0-9]+)?' _num = r'(?:[0-9]+\.?[0-9]*|\.[0-9]+)' -_num_c = r'(?:[0-9]+[.,]?[0-9]*|[.,][0-9]+)' _float_sign_exp_re = r'([-+]?{0}{1}|[{2}])' _float_sign_exp_re = _float_sign_exp_re.format(_num, _exp, numeric) _float_sign_exp_re = re.compile(_float_sign_exp_re, flags=re.U) @@ -61,18 +55,6 @@ _float_sign_noexp_re = re.compile(_float_sign_noexp_re, flags=re.U) _float_nosign_noexp_re = r'({0}|[{1}])' _float_nosign_noexp_re = _float_nosign_noexp_re.format(_num, numeric) _float_nosign_noexp_re = re.compile(_float_nosign_noexp_re, flags=re.U) -_float_sign_exp_re_c = r'([-+]?{0}{1}|[{2}])' -_float_sign_exp_re_c = _float_sign_exp_re_c.format(_num_c, _exp, numeric) -_float_sign_exp_re_c = re.compile(_float_sign_exp_re_c, flags=re.U) -_float_nosign_exp_re_c = r'({0}{1}|[{2}])' -_float_nosign_exp_re_c = _float_nosign_exp_re_c.format(_num_c, _exp, numeric) -_float_nosign_exp_re_c = re.compile(_float_nosign_exp_re_c, flags=re.U) -_float_sign_noexp_re_c = r'([-+]?{0}|[{1}])' -_float_sign_noexp_re_c = _float_sign_noexp_re_c.format(_num_c, numeric) -_float_sign_noexp_re_c = re.compile(_float_sign_noexp_re_c, flags=re.U) -_float_nosign_noexp_re_c = r'({0}|[{1}])' -_float_nosign_noexp_re_c = _float_nosign_noexp_re_c.format(_num_c, numeric) -_float_nosign_noexp_re_c = re.compile(_float_nosign_noexp_re_c, flags=re.U) # Integer regexes - include Unicode digits. _int_nosign_re = r'([0-9]+|[{0}])'.format(digits) @@ -81,27 +63,19 @@ _int_sign_re = r'([-+]?[0-9]+|[{0}])'.format(digits) _int_sign_re = re.compile(_int_sign_re, flags=re.U) # This dict will help select the correct regex and number conversion function. -_regex_and_num_function_chooser = { - (ns.F | ns.S, '.'): (_float_sign_exp_re, fast_float), - (ns.F | ns.S | ns.N, '.'): (_float_sign_noexp_re, fast_float), - (ns.F | ns.U, '.'): (_float_nosign_exp_re, fast_float), - (ns.F | ns.U | ns.N, '.'): (_float_nosign_noexp_re, fast_float), - (ns.I | ns.S, '.'): (_int_sign_re, fast_int), - (ns.I | ns.S | ns.N, '.'): (_int_sign_re, fast_int), - (ns.I | ns.U, '.'): (_int_nosign_re, fast_int), - (ns.I | ns.U | ns.N, '.'): (_int_nosign_re, fast_int), - (ns.F | ns.S, ','): (_float_sign_exp_re_c, fast_float), - (ns.F | ns.S | ns.N, ','): (_float_sign_noexp_re_c, fast_float), - (ns.F | ns.U, ','): (_float_nosign_exp_re_c, fast_float), - (ns.F | ns.U | ns.N, ','): (_float_nosign_noexp_re_c, fast_float), - (ns.I | ns.S, ','): (_int_sign_re, fast_int), - (ns.I | ns.S | ns.N, ','): (_int_sign_re, fast_int), - (ns.I | ns.U, ','): (_int_nosign_re, fast_int), - (ns.I | ns.U | ns.N, ','): (_int_nosign_re, fast_int), +_regex_chooser = { + (ns.F | ns.S): _float_sign_exp_re, + (ns.F | ns.S | ns.N): _float_sign_noexp_re, + (ns.F | ns.U): _float_nosign_exp_re, + (ns.F | ns.U | ns.N): _float_nosign_noexp_re, + (ns.I | ns.S): _int_sign_re, + (ns.I | ns.S | ns.N): _int_sign_re, + (ns.I | ns.U): _int_nosign_re, + (ns.I | ns.U | ns.N): _int_nosign_re, } -def _natsort_key(val, key, alg): +def _natsort_key(val, key, string_func, bytes_func, num_func): """\ Key to sort strings and numbers naturally. @@ -122,84 +96,26 @@ def _natsort_key(val, key, alg): """ - # Convert the arguments to the proper input tuple - try: - use_locale = alg & ns.LOCALE - inp_options = (alg & ns._NUMERIC_ONLY, - localeconv()['decimal_point'] if use_locale else '.') - except TypeError: - msg = "_natsort_key: 'alg' argument must be from the enum 'ns'" - raise ValueError(msg+', got {0}'.format(py23_str(alg))) - - # Get the proper regex and conversion function. - try: - regex, num_function = _regex_and_num_function_chooser[inp_options] - except KeyError: # pragma: no cover - if inp_options[1] not in ('.', ','): - raise ValueError("_natsort_key: currently natsort only supports " - "the decimal separators '.' and ','. " - "Please file a bug report.") - else: - raise - else: - # Apply key if needed. - if key is not None: - val = key(val) + # Apply key if needed + if key is not None: + val = key(val) - # Assume the input are strings, which is the most common case. + # Assume the input are strings, which is the most commong case + try: + return string_func(val) + except (TypeError, AttributeError): + # If bytes type, use the bytes_func + if type(val) in (bytes,): + return bytes_func(val) + # Otherwise, assume it is an iterable that must be parser recursively. + # Do not apply the key recursively. try: - if use_locale and dumb_sort(): - alg |= ns._DUMB - split = _parse_string_function( - alg, - null_string if use_locale else '', - regex.split, - _pre_split_function(alg), - _post_split_function(alg), - _post_string_parse_function(alg, null_string) - ) - if alg & ns.PATH: - split = _parse_path_function(split) - return split(val) - except (TypeError, AttributeError): - # Check if it is a bytes type, and if so return as a - # one element tuple. - if type(val) in (bytes,): - return _parse_bytes_function(alg)(val) - # If not strings, assume it is an iterable that must - # be parsed recursively. Do not apply the key recursively. - try: - return tuple([_natsort_key(x, None, alg) for x in val]) - # If there is still an error, it must be a number. - # Return as-is, with a leading empty string. - except TypeError: - sep = null_string if alg & ns.LOCALE else '' - f = _parse_number_function(alg, sep) - return f(val) - - -def _number_extracter(s, regex, numconv, use_locale, group_letters): - """Helper to separate the string input into numbers and strings.""" - - # Split the input string by numbers, dropping empty strings. - # If the input is not a string, TypeError is raised. - s = py23_filter(None, regex.split(s)) - - # Now convert the numbers to numbers, and leave strings as strings. - # Take into account locale if needed, and group letters if needed. - # Remove empty strings from the list. Insert empty strings between - # adjascent numbers, or at the beginning of the iterable if it is - # a number. - if use_locale and group_letters: - func = partial(numconv, key=lambda x: locale_convert(groupletters(x))) - elif use_locale: - func = partial(numconv, key=locale_convert) - elif group_letters: - func = partial(numconv, key=groupletters) - else: - func = numconv - return list(_sep_inserter(py23_map(func, s), - null_string if use_locale else '')) + return tuple(_natsort_key( + x, None, string_func, bytes_func, num_func + ) for x in val) + # If that failed, it must be a number. + except TypeError: + return num_func(val) def _parse_bytes_function(alg): diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index eac8c39..8b4bf80 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -57,7 +57,7 @@ def test_natsort_key_public_raises_DeprecationWarning_when_called(): # But it raises a deprecation warning with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.I) + assert natsort_key('a-5.034e2') == ('a-', 5, '.', 34, 'e', 2) assert len(w) == 1 assert "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" in str(w[-1].message) # It is called for each element in a list when sorting @@ -68,19 +68,17 @@ def test_natsort_key_public_raises_DeprecationWarning_when_called(): assert len(w) == 7 -def test_natsort_keygen_returns_natsort_key_with_alg_option(): - a = 'a-5.034e1' - assert natsort_keygen()(a) == _natsort_key(a, None, ns.I) - assert natsort_keygen(alg=ns.F | ns.S)(a) == _natsort_key(a, None, ns.F | ns.S) +def test_natsort_keygen_with_invalid_alg_input_raises_ValueError(): + # Invalid arguments give the correct response + with raises(ValueError) as err: + natsort_keygen(None, '1') + assert str(err.value) == "natsort_keygen: 'alg' argument must be from the enum 'ns', got 1" -def test_natsort_keygen_with_key_returns_same_result_as_nested_lambda_with_bare_natsort_key(): +def test_natsort_keygen_returns_natsort_key_that_parses_input(): a = 'a-5.034e1' - f1 = natsort_keygen(key=lambda x: x.upper()) - - def f2(x): - return _natsort_key(x, lambda y: y.upper(), ns.I) - assert f1(a) == f2(a) + assert natsort_keygen()(a) == ('a-', 5, '.', 34, 'e', 1) + assert natsort_keygen(alg=ns.F | ns.S)(a) == ('a', -50.34) def test_natsort_keygen_returns_key_that_can_be_used_to_sort_list_in_place_with_same_result_as_natsorted(): @@ -300,12 +298,12 @@ def test_natsorted_with_LOCALE_and_en_setting_returns_results_sorted_by_en_langu locale.setlocale(locale.LC_ALL, str('')) -@pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale') -def test_natsorted_with_LOCALE_and_de_setting_returns_results_sorted_by_de_language(): - load_locale('de_DE') - a = ['c', 'ä', 'b', 'a5,6', 'a5,50'] - assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,50', 'a5,6', 'ä', 'b', 'c'] - locale.setlocale(locale.LC_ALL, str('')) +# @pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale') +# def test_natsorted_with_LOCALE_and_de_setting_returns_results_sorted_by_de_language(): +# load_locale('de_DE') +# a = ['c', 'ä', 'b', 'a5,6', 'a5,50'] +# assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,50', 'a5,6', 'ä', 'b', 'c'] +# locale.setlocale(locale.LC_ALL, str('')) def test_natsorted_with_LOCALE_and_mixed_input_returns_sorted_results_without_error(): diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py index d56f0b4..737b3a7 100644 --- a/test_natsort/test_utils.py +++ b/test_natsort/test_utils.py @@ -3,20 +3,18 @@ from __future__ import unicode_literals import sys -import locale import pathlib import pytest import string from math import isnan, isinf -from operator import itemgetter, neg as op_neg -from itertools import chain +from operator import neg as op_neg from pytest import raises from natsort.ns_enum import ns from natsort.utils import ( - _number_extracter, _sep_inserter, _natsort_key, _args_to_enum, + _regex_chooser, _float_sign_exp_re, _float_nosign_exp_re, _float_sign_noexp_re, @@ -42,7 +40,6 @@ from natsort.compat.py23 import py23_str from natsort.compat.locale import ( use_pyicu, null_string, - dumb_sort, ) from natsort.compat.fastnumbers import ( fast_float, @@ -54,12 +51,6 @@ from slow_splitters import ( sep_inserter, add_leading_space_if_first_is_num, ) -from compat.locale import ( - load_locale, - get_strxfrm, - low, - bad_uni_chars, -) from compat.hypothesis import ( assume, given, @@ -77,8 +68,6 @@ from compat.locale import bad_uni_chars if sys.version[0] == '3': long = int -ichain = chain.from_iterable - def test_do_decoding_decodes_bytes_string_to_unicode(): assert type(_do_decoding(b'bytes', 'ascii')) is py23_str @@ -147,14 +136,48 @@ def test_args_to_enum_converts_None_to_ns_IU(): assert _args_to_enum(**{'number_type': None, 'exp': True}) == ns.I | ns.U -float_locale_group = (fast_float, True, True) -float_locale_nogroup = (fast_float, True, False) -float_nolocale_group = (fast_float, False, True) -float_nolocale_nogroup = (fast_float, False, False) -int_locale_group = (fast_int, True, True) -int_locale_nogroup = (fast_int, True, False) -int_nolocale_group = (fast_int, False, True) -int_nolocale_nogroup = (fast_int, False, False) + +def test_regex_chooser_returns_correct_regular_expression_object(): + assert _regex_chooser[ns.INT] is _int_nosign_re + assert _regex_chooser[ns.INT | ns.NOEXP] is _int_nosign_re + assert _regex_chooser[ns.INT | ns.SIGNED] is _int_sign_re + assert _regex_chooser[ns.INT | ns.SIGNED | ns.NOEXP] is _int_sign_re + assert _regex_chooser[ns.FLOAT] is _float_nosign_exp_re + assert _regex_chooser[ns.FLOAT | ns.NOEXP] is _float_nosign_noexp_re + assert _regex_chooser[ns.FLOAT | ns.SIGNED] is _float_sign_exp_re + assert _regex_chooser[ns.FLOAT | ns.SIGNED | ns.NOEXP] is _float_sign_noexp_re + + +def test_ns_enum_values_have_are_as_expected(): + # Defaults + assert ns.TYPESAFE == 0 + assert ns.INT == 0 + assert ns.VERSION == 0 + assert ns.DIGIT == 0 + assert ns.UNSIGNED == 0 + + # Aliases + assert ns.TYPESAFE == ns.T + assert ns.INT == ns.I + assert ns.VERSION == ns.V + assert ns.DIGIT == ns.D + assert ns.UNSIGNED == ns.U + assert ns.FLOAT == ns.F + assert ns.SIGNED == ns.S + assert ns.NOEXP == ns.N + assert ns.PATH == ns.P + assert ns.LOCALE == ns.L + assert ns.IGNORECASE == ns.IC + assert ns.LOWERCASEFIRST == ns.LF + assert ns.GROUPLETTERS == ns.G + assert ns.UNGROUPLETTERS == ns.UG + assert ns.CAPITALFIRST == ns.C + assert ns.UNGROUPLETTERS == ns.CAPITALFIRST + assert ns.NANLAST == ns.NL + + # Convenience + assert ns.REAL == ns.FLOAT | ns.SIGNED + assert ns._NUMERIC_ONLY == ns.REAL | ns.NOEXP def test_chain_functions_is_a_no_op_if_no_functions_are_given(): @@ -609,279 +632,79 @@ def test_parse_string_function_selects_pre_function_value_if_not_dumb(): def test_parse_path_function_parses_string_as_path_then_as_string(): splt = _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2) assert _parse_path_function(splt)('/p/Folder (10)/file34.5nm (2).tar.gz') == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) + assert _parse_path_function(splt)('../Folder (10)/file (2).tar.gz') == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) + assert _parse_path_function(splt)('Folder (10)/file.f34.5nm (2).tar.gz') == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) -def test__natsort_key_with_nan_input_transforms_nan_to_negative_inf(): - assert _natsort_key('nan', None, ns.FLOAT) == ('', float('-inf')) - assert _natsort_key(float('nan'), None, 0) == ('', float('-inf')) - - -def test__natsort_key_with_nan_input_and_NANLAST_transforms_nan_to_positive_inf(): - assert _natsort_key('nan', None, ns.FLOAT | ns.NANLAST) == ('', float('+inf')) - assert _natsort_key(float('nan'), None, ns.NANLAST) == ('', float('+inf')) - assert ns.NL == ns.NANLAST +# Just a few tests to make sure _natsort_key is working +regex = _regex_chooser[ns.INT] +pre = _pre_split_function(ns.INT) +post = _post_split_function(ns.INT) +after = _post_string_parse_function(ns.INT, '') +string_func = _parse_string_function(ns.INT, '', regex.split, pre, post, after) +bytes_func = _parse_bytes_function(ns.INT) +num_func = _parse_number_function(ns.INT, '') -# The remaining tests provide no examples, just hypothesis tests. -# They only confirm that _natsort_key uses the above building blocks. - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_float_and_signed_splits_input_into_string_and_signed_float_with_exponent(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert ns.F == ns.FLOAT - assert ns.S == ns.SIGNED - assert _natsort_key(s, None, ns.F | ns.S) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nolocale_nogroup)) - - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_real_splits_input_into_string_and_signed_float_with_exponent(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert ns.R == ns.F | ns.S - assert _natsort_key(s, None, ns.R) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nolocale_nogroup)) - - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_real_matches_signed_float(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _natsort_key(s, None, ns.R) == _natsort_key(s, None, ns.F | ns.S) - - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_float_and_signed_and_noexp_splits_input_into_string_and_signed_float_without_exponent(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert ns.N == ns.NOEXP - assert _natsort_key(s, None, ns.F | ns.S | ns.N) == tuple(_number_extracter(s, _float_sign_noexp_re, *float_nolocale_nogroup)) - - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_float_and_unsigned_splits_input_into_string_and_unsigned_float(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert ns.U == ns.UNSIGNED - assert _natsort_key(s, None, ns.F | ns.U) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nolocale_nogroup)) - # Default is unsigned search - assert _natsort_key(s, None, ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nolocale_nogroup)) - - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_float_and_noexp_splits_input_into_string_and_unsigned_float_without_exponent(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _natsort_key(s, None, ns.F | ns.N) == tuple(_number_extracter(s, _float_nosign_noexp_re, *float_nolocale_nogroup)) - - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_int_splits_input_into_string_and_unsigned_int(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert ns.I == ns.INT - assert _natsort_key(s, None, ns.INT) == tuple(_number_extracter(s, _int_nosign_re, *int_nolocale_nogroup)) - # Default is int search - assert _natsort_key(s, None, ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nolocale_nogroup)) - # NOEXP is ignored for integers - assert _natsort_key(s, None, ns.I | ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nolocale_nogroup)) - - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_int_splits_and_signed_input_into_string_and_signed_int(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _natsort_key(s, None, ns.INT | ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nolocale_nogroup)) - assert _natsort_key(s, None, ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nolocale_nogroup)) - - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_version_or_digit_matches_usigned_int(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _natsort_key(s, None, ns.VERSION) == _natsort_key(s, None, ns.INT | ns.UNSIGNED) - assert _natsort_key(s, None, ns.DIGIT) == _natsort_key(s, None, ns.VERSION) +def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple(): + # It gracefully handles as_path for numeric input by putting an extra tuple around it + # so it will sort against the other as_path results. + sfunc = _parse_path_function(string_func) + bytes_func = _parse_bytes_function(ns.PATH) + num_func = _parse_number_function(ns.PATH, '') + assert _natsort_key(10, None, sfunc, bytes_func, num_func) == (('', 10),) -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_key_applies_key_function_before_splitting(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _natsort_key(s, lambda x: x.upper(), ns.I) == tuple(_number_extracter(s.upper(), _int_nosign_re, *int_nolocale_nogroup)) +def test__natsort_key_with_bytes_input_and_PATH_returns_number_in_nested_tuple(): + # It gracefully handles as_path for numeric input by putting an extra tuple around it + # so it will sort against the other as_path results. + sfunc = _parse_path_function(string_func) + bytes_func = _parse_bytes_function(ns.PATH) + num_func = _parse_number_function(ns.PATH, '') + assert _natsort_key(b'/hello/world', None, sfunc, bytes_func, num_func) == ((b'/hello/world',),) -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_tuple_input_returns_nested_tuples(x): - # Iterables are parsed recursively so you can sort lists of lists. - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - t = tuple(_number_extracter(s, _int_nosign_re, *int_nolocale_nogroup)) - assert _natsort_key((s, s), None, ns.I) == (t, t) +def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple(): + # PATH also handles recursion well. + sfunc = _parse_path_function(string_func) + bytes_func = _parse_bytes_function(ns.PATH) + num_func = _parse_number_function(ns.PATH, '') + assert _natsort_key(('/Folder', '/Folder (1)'), None, sfunc, bytes_func, num_func) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')'))) -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_tuple_input_but_itemgetter_key_returns_split_second_element(x): - # A key is applied before recursion, but not in the recursive calls. - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - t = tuple(_number_extracter(s, _int_nosign_re, *int_nolocale_nogroup)) - assert _natsort_key((s, s), itemgetter(1), ns.I) == t +# The remaining tests provide no examples, just hypothesis tests. +# They only confirm that _natsort_key uses the above building blocks. @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(floats()) -def test__natsort_key_with_numeric_input_returns_number_with_leading_empty_string(x): +@given(floats() | integers()) +def test__natsort_key_with_numeric_input_takes_number_path(x): assume(not isnan(x)) - if x.is_integer(): - x = int(x) - assert _natsort_key(x, None, ns.I) == ('', x) - - -def test__natsort_key_with_invalid_alg_input_raises_ValueError(): - # Invalid arguments give the correct response - with raises(ValueError) as err: - _natsort_key('a', None, '1') - assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1" - - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_IGNORECASE_lowercases_text(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - try: - assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.casefold(), _int_nosign_re, *int_nolocale_nogroup)) - except AttributeError: - assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.lower(), _int_nosign_re, *int_nolocale_nogroup)) - - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_LOWERCASEFIRST_inverts_text_case(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _natsort_key(s, None, ns.LOWERCASEFIRST) == tuple(_number_extracter(s.swapcase(), _int_nosign_re, *int_nolocale_nogroup)) + assert _natsort_key(x, None, string_func, bytes_func, num_func) == num_func(x) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_GROUPLETTERS_doubles_text_with_lowercase_letter_first(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - t = _number_extracter(s, _int_nosign_re, *int_nolocale_nogroup) - assert _natsort_key(s, None, ns.GROUPLETTERS) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t) - - -@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_GROUPLETTERS_and_LOWERCASEFIRST_inverts_text_first_then_doubles_letters_with_lowercase_letter_first(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - t = _number_extracter(s.swapcase(), _int_nosign_re, *int_nolocale_nogroup) - assert _natsort_key(s, None, ns.G | ns.LF) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t) - - -def test__natsort_key_with_bytes_input_only_applies_LOWERCASEFIRST_or_IGNORECASE_and_returns_in_tuple(): - if sys.version[0] == '3': - assert _natsort_key(b'Apple56', None, ns.I) == (b'Apple56',) - assert _natsort_key(b'Apple56', None, ns.LF) == (b'Apple56',) - assert _natsort_key(b'Apple56', None, ns.IC) == (b'apple56',) - assert _natsort_key(b'Apple56', None, ns.G) == (b'Apple56',) - else: - assert True +@given(binary()) +def test__natsort_key_with_bytes_input_takes_bytes_path(x): + assert _natsort_key(x, None, string_func, bytes_func, num_func) == bytes_func(x) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') @given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_LOCALE_transforms_floats_according_to_the_current_locale_and_strxfrms_strings(x): - # Locale aware sorting +def test__natsort_key_with_text_input_takes_string_path(x): assume(not any(type(y) == float and isnan(y) for y in x)) - assume(not any(any(i in bad_uni_chars for i in y) for y in x if isinstance(y, py23_str))) s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - load_locale('en_US') - if dumb_sort(): - assert _natsort_key(s, None, ns.LOCALE | ns.F) == tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_locale_group)) - else: - assert _natsort_key(s, None, ns.LOCALE | ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_locale_nogroup)) - locale.setlocale(locale.LC_NUMERIC, str('')) + assert _natsort_key(s, None, string_func, bytes_func, num_func) == string_func(s) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_LOCALE_and_UNGROUPLETTERS_places_space_before_string_with_capital_first_letter(x): - # Locale aware sorting - assume(not any(type(y) == float and isnan(y) for y in x)) - assume(not any(any(i in bad_uni_chars for i in y) for y in x if isinstance(y, py23_str))) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - load_locale('en_US') - if dumb_sort(): - t = tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_locale_group)) - else: - t = tuple(_number_extracter(s, _float_nosign_exp_re, *float_locale_nogroup)) - if not t: - r = (t, t) - elif t[0] in (null_string, get_strxfrm()(b'\x00') if sys.version[0] == '2' and not use_pyicu else null_string): - r = ((b'' if use_pyicu else '',), t) - else: - r = ((s[0],), t) - assert _natsort_key(s, None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == r - # The below are all aliases for UNGROUPLETTERS - assert ns.UNGROUPLETTERS == ns.UG - assert ns.UNGROUPLETTERS == ns.CAPITALFIRST - assert ns.UNGROUPLETTERS == ns.C - locale.setlocale(locale.LC_NUMERIC, str('')) +@given(lists(elements=text(), min_size=1, max_size=10)) +def test__natsort_key_with_nested_input_takes_nested_path(x): + assert _natsort_key(x, None, string_func, bytes_func, num_func) == tuple(string_func(s) for s in x) @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') -@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10)) -def test__natsort_key_with_UNGROUPLETTERS_does_nothing_without_LOCALE(x): - assume(not any(type(y) == float and isnan(y) for y in x)) - s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) - assert _natsort_key(s, None, ns.UG | ns.I) == _natsort_key(s, None, ns.I) - - -# It is difficult to generate code that will create random filesystem paths, -# so "example" based tests are given for the PATH option. - - -def test__natsort_key_with_absolute_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_root_and_split_extensions(): - # Turn on PATH to split a file path into components - assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', None, ns.PATH | ns.F) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) - - -def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_relative_parent_and_split_extensions(): - assert _natsort_key('../Folder (10)/file (2).tar.gz', None, ns.PATH | ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) - - -def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_and_split_extensions(): - assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', None, ns.PATH | ns.F) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) - - -def test__natsort_key_with_pathlib_intput_and_PATH_returns_nested_tuples(): - # Converts pathlib PurePath (and subclass) objects to string before sorting - assert _natsort_key(pathlib.Path('../Folder (10)/file (2).tar.gz'), None, ns.PATH | ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) - - -def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple(): - # It gracefully handles as_path for numeric input by putting an extra tuple around it - # so it will sort against the other as_path results. - assert _natsort_key(10, None, ns.PATH) == (('', 10),) - - -def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple(): - # PATH also handles recursion well. - assert _natsort_key(('/Folder', '/Folder (1)'), None, ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')'))) +@given(text()) +def test__natsort_key_with_key_argument_applies_key_before_processing(x): + assert _natsort_key(x, len, string_func, bytes_func, num_func) == num_func(len(x)) |