summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth M Morton <seth.m.morton@gmail.com>2016-05-02 22:37:09 -0700
committerSeth M Morton <seth.m.morton@gmail.com>2016-05-02 23:20:59 -0700
commit81d09ffc518a39f9ae524202671d8077679c897d (patch)
tree77b6ed7f5b14c795e9931a99d95e0694913fb7bd
parentca7d4fc9aa840ca47b46dfed5e31eaa261245226 (diff)
downloadnatsort-81d09ffc518a39f9ae524202671d8077679c897d.tar.gz
Redefined _natsort_key in a functional style.
This required changing the input to be the functions created by all the function factories that have been recently committed. This had a cascading effect that required a re-write of several unit tests, as well as the top level natsort_keygen function, which now has the job of creating the appropriate functions on the fly to pass to _natsort_key. A future commit will add more testing to natsort_keygen. For the moment, handling of locale-specific numbers is disabled, but will be added back in a future commit.
-rw-r--r--natsort/__main__.py13
-rw-r--r--natsort/natsort.py58
-rw-r--r--natsort/utils.py142
-rw-r--r--test_natsort/test_natsort.py32
-rw-r--r--test_natsort/test_utils.py357
5 files changed, 194 insertions, 408 deletions
diff --git a/natsort/__main__.py b/natsort/__main__.py
index 03c4551..5361ea2 100644
--- a/natsort/__main__.py
+++ b/natsort/__main__.py
@@ -11,7 +11,7 @@ import sys
# Local imports.
from natsort.natsort import natsorted, ns
-from natsort.utils import _regex_and_num_function_chooser
+from natsort.utils import _regex_chooser
from natsort._version import __version__
from natsort.compat.py23 import py23_str
@@ -171,27 +171,26 @@ def sort_and_print_entries(entries, args):
if do_filter or args.exclude:
inp_options = (ns.FLOAT * is_float |
ns.SIGNED * signed |
- ns.NOEXP * (not args.exp),
- '.'
+ ns.NOEXP * (not args.exp)
)
- regex, num_function = _regex_and_num_function_chooser[inp_options]
+ regex = _regex_chooser[inp_options]
if args.filter is not None:
lows, highs = ([f[0] for f in args.filter],
[f[1] for f in args.filter])
entries = [entry for entry in entries
if keep_entry_range(entry, lows, highs,
- num_function, regex)]
+ float, regex)]
if args.reverse_filter is not None:
lows, highs = ([f[0] for f in args.reverse_filter],
[f[1] for f in args.reverse_filter])
entries = [entry for entry in entries
if not keep_entry_range(entry, lows, highs,
- num_function, regex)]
+ float, regex)]
if args.exclude:
exclude = set(args.exclude)
entries = [entry for entry in entries
if exclude_entry(entry, exclude,
- num_function, regex)]
+ float, regex)]
# Print off the sorted results
for entry in natsorted(entries, reverse=args.reverse, alg=alg):
diff --git a/natsort/natsort.py b/natsort/natsort.py
index 419b876..df2f32a 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -18,18 +18,32 @@ from __future__ import (
)
# Std lib. imports.
-import re
from operator import itemgetter
from functools import partial
from warnings import warn
# Local imports.
from natsort.ns_enum import ns
-from natsort.compat.py23 import u_format
+from natsort.compat.py23 import (
+ u_format,
+ py23_str,
+)
+from natsort.compat.locale import (
+ null_string,
+ dumb_sort,
+)
from natsort.utils import (
_natsort_key,
_args_to_enum,
_do_decoding,
+ _regex_chooser,
+ _parse_string_function,
+ _parse_path_function,
+ _parse_number_function,
+ _parse_bytes_function,
+ _pre_split_function,
+ _post_split_function,
+ _post_string_parse_function,
)
# Make sure the doctest works for either python2 or python3
@@ -132,7 +146,7 @@ def natsort_key(val, key=None, alg=0, **_kwargs):
"""Undocumented, kept for backwards-compatibility."""
msg = "natsort_key is deprecated as of 3.4.0, please use natsort_keygen"
warn(msg, DeprecationWarning)
- return _natsort_key(val, key, _args_to_enum(**_kwargs) | alg)
+ return natsort_keygen(key, alg, **_kwargs)(val)
@u_format
@@ -183,7 +197,43 @@ def natsort_keygen(key=None, alg=0, **_kwargs):
[{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3']
"""
- return partial(_natsort_key, key=key, alg=_args_to_enum(**_kwargs) | alg)
+ # Transform old arguments to the ns enum.
+ try:
+ alg = _args_to_enum(**_kwargs) | alg
+ except TypeError:
+ msg = "natsort_keygen: 'alg' argument must be from the enum 'ns'"
+ raise ValueError(msg+', got {0}'.format(py23_str(alg)))
+
+ # Add the _DUMB option if the locale library is broken.
+ if alg & ns.LOCALE and dumb_sort():
+ alg |= ns._DUMB
+
+ # Set some variable that will be passed to the factory functions
+ sep = null_string if alg & ns.LOCALE else ''
+ regex = _regex_chooser[alg & ns._NUMERIC_ONLY]
+
+ # Create the functions that will be used to split strings.
+ pre = _pre_split_function(alg)
+ post = _post_split_function(alg)
+ after = _post_string_parse_function(alg, sep)
+
+ # Create the high-level parsing functions for strings, bytes, and numbers.
+ string_func = _parse_string_function(
+ alg, sep, regex.split, pre, post, after
+ )
+ if alg & ns.PATH:
+ string_func = _parse_path_function(string_func)
+ bytes_func = _parse_bytes_function(alg)
+ num_func = _parse_number_function(alg, sep)
+
+ # Return the natsort key with the parsing path pre-chosen.
+ return partial(
+ _natsort_key,
+ key=key,
+ string_func=string_func,
+ bytes_func=bytes_func,
+ num_func=num_func
+ )
@u_format
diff --git a/natsort/utils.py b/natsort/utils.py
index aa16db0..d47c747 100644
--- a/natsort/utils.py
+++ b/natsort/utils.py
@@ -17,7 +17,6 @@ from warnings import warn
from os import curdir as os_curdir, pardir as os_pardir
from os.path import split as path_split, splitext as path_splitext
from itertools import chain as ichain
-from locale import localeconv
from collections import deque
from functools import partial
from operator import methodcaller
@@ -33,11 +32,7 @@ from natsort.compat.py23 import (
py23_filter,
PY_VERSION,
)
-from natsort.compat.locale import (
- dumb_sort,
- use_pyicu,
- null_string,
-)
+from natsort.compat.locale import use_pyicu
from natsort.compat.fastnumbers import (
fast_float,
fast_int,
@@ -48,7 +43,6 @@ if sys.version[0] == '3':
# The regex that locates floats - include Unicode numerals.
_exp = r'(?:[eE][-+]?[0-9]+)?'
_num = r'(?:[0-9]+\.?[0-9]*|\.[0-9]+)'
-_num_c = r'(?:[0-9]+[.,]?[0-9]*|[.,][0-9]+)'
_float_sign_exp_re = r'([-+]?{0}{1}|[{2}])'
_float_sign_exp_re = _float_sign_exp_re.format(_num, _exp, numeric)
_float_sign_exp_re = re.compile(_float_sign_exp_re, flags=re.U)
@@ -61,18 +55,6 @@ _float_sign_noexp_re = re.compile(_float_sign_noexp_re, flags=re.U)
_float_nosign_noexp_re = r'({0}|[{1}])'
_float_nosign_noexp_re = _float_nosign_noexp_re.format(_num, numeric)
_float_nosign_noexp_re = re.compile(_float_nosign_noexp_re, flags=re.U)
-_float_sign_exp_re_c = r'([-+]?{0}{1}|[{2}])'
-_float_sign_exp_re_c = _float_sign_exp_re_c.format(_num_c, _exp, numeric)
-_float_sign_exp_re_c = re.compile(_float_sign_exp_re_c, flags=re.U)
-_float_nosign_exp_re_c = r'({0}{1}|[{2}])'
-_float_nosign_exp_re_c = _float_nosign_exp_re_c.format(_num_c, _exp, numeric)
-_float_nosign_exp_re_c = re.compile(_float_nosign_exp_re_c, flags=re.U)
-_float_sign_noexp_re_c = r'([-+]?{0}|[{1}])'
-_float_sign_noexp_re_c = _float_sign_noexp_re_c.format(_num_c, numeric)
-_float_sign_noexp_re_c = re.compile(_float_sign_noexp_re_c, flags=re.U)
-_float_nosign_noexp_re_c = r'({0}|[{1}])'
-_float_nosign_noexp_re_c = _float_nosign_noexp_re_c.format(_num_c, numeric)
-_float_nosign_noexp_re_c = re.compile(_float_nosign_noexp_re_c, flags=re.U)
# Integer regexes - include Unicode digits.
_int_nosign_re = r'([0-9]+|[{0}])'.format(digits)
@@ -81,27 +63,19 @@ _int_sign_re = r'([-+]?[0-9]+|[{0}])'.format(digits)
_int_sign_re = re.compile(_int_sign_re, flags=re.U)
# This dict will help select the correct regex and number conversion function.
-_regex_and_num_function_chooser = {
- (ns.F | ns.S, '.'): (_float_sign_exp_re, fast_float),
- (ns.F | ns.S | ns.N, '.'): (_float_sign_noexp_re, fast_float),
- (ns.F | ns.U, '.'): (_float_nosign_exp_re, fast_float),
- (ns.F | ns.U | ns.N, '.'): (_float_nosign_noexp_re, fast_float),
- (ns.I | ns.S, '.'): (_int_sign_re, fast_int),
- (ns.I | ns.S | ns.N, '.'): (_int_sign_re, fast_int),
- (ns.I | ns.U, '.'): (_int_nosign_re, fast_int),
- (ns.I | ns.U | ns.N, '.'): (_int_nosign_re, fast_int),
- (ns.F | ns.S, ','): (_float_sign_exp_re_c, fast_float),
- (ns.F | ns.S | ns.N, ','): (_float_sign_noexp_re_c, fast_float),
- (ns.F | ns.U, ','): (_float_nosign_exp_re_c, fast_float),
- (ns.F | ns.U | ns.N, ','): (_float_nosign_noexp_re_c, fast_float),
- (ns.I | ns.S, ','): (_int_sign_re, fast_int),
- (ns.I | ns.S | ns.N, ','): (_int_sign_re, fast_int),
- (ns.I | ns.U, ','): (_int_nosign_re, fast_int),
- (ns.I | ns.U | ns.N, ','): (_int_nosign_re, fast_int),
+_regex_chooser = {
+ (ns.F | ns.S): _float_sign_exp_re,
+ (ns.F | ns.S | ns.N): _float_sign_noexp_re,
+ (ns.F | ns.U): _float_nosign_exp_re,
+ (ns.F | ns.U | ns.N): _float_nosign_noexp_re,
+ (ns.I | ns.S): _int_sign_re,
+ (ns.I | ns.S | ns.N): _int_sign_re,
+ (ns.I | ns.U): _int_nosign_re,
+ (ns.I | ns.U | ns.N): _int_nosign_re,
}
-def _natsort_key(val, key, alg):
+def _natsort_key(val, key, string_func, bytes_func, num_func):
"""\
Key to sort strings and numbers naturally.
@@ -122,84 +96,26 @@ def _natsort_key(val, key, alg):
"""
- # Convert the arguments to the proper input tuple
- try:
- use_locale = alg & ns.LOCALE
- inp_options = (alg & ns._NUMERIC_ONLY,
- localeconv()['decimal_point'] if use_locale else '.')
- except TypeError:
- msg = "_natsort_key: 'alg' argument must be from the enum 'ns'"
- raise ValueError(msg+', got {0}'.format(py23_str(alg)))
-
- # Get the proper regex and conversion function.
- try:
- regex, num_function = _regex_and_num_function_chooser[inp_options]
- except KeyError: # pragma: no cover
- if inp_options[1] not in ('.', ','):
- raise ValueError("_natsort_key: currently natsort only supports "
- "the decimal separators '.' and ','. "
- "Please file a bug report.")
- else:
- raise
- else:
- # Apply key if needed.
- if key is not None:
- val = key(val)
+ # Apply key if needed
+ if key is not None:
+ val = key(val)
- # Assume the input are strings, which is the most common case.
+ # Assume the input are strings, which is the most commong case
+ try:
+ return string_func(val)
+ except (TypeError, AttributeError):
+ # If bytes type, use the bytes_func
+ if type(val) in (bytes,):
+ return bytes_func(val)
+ # Otherwise, assume it is an iterable that must be parser recursively.
+ # Do not apply the key recursively.
try:
- if use_locale and dumb_sort():
- alg |= ns._DUMB
- split = _parse_string_function(
- alg,
- null_string if use_locale else '',
- regex.split,
- _pre_split_function(alg),
- _post_split_function(alg),
- _post_string_parse_function(alg, null_string)
- )
- if alg & ns.PATH:
- split = _parse_path_function(split)
- return split(val)
- except (TypeError, AttributeError):
- # Check if it is a bytes type, and if so return as a
- # one element tuple.
- if type(val) in (bytes,):
- return _parse_bytes_function(alg)(val)
- # If not strings, assume it is an iterable that must
- # be parsed recursively. Do not apply the key recursively.
- try:
- return tuple([_natsort_key(x, None, alg) for x in val])
- # If there is still an error, it must be a number.
- # Return as-is, with a leading empty string.
- except TypeError:
- sep = null_string if alg & ns.LOCALE else ''
- f = _parse_number_function(alg, sep)
- return f(val)
-
-
-def _number_extracter(s, regex, numconv, use_locale, group_letters):
- """Helper to separate the string input into numbers and strings."""
-
- # Split the input string by numbers, dropping empty strings.
- # If the input is not a string, TypeError is raised.
- s = py23_filter(None, regex.split(s))
-
- # Now convert the numbers to numbers, and leave strings as strings.
- # Take into account locale if needed, and group letters if needed.
- # Remove empty strings from the list. Insert empty strings between
- # adjascent numbers, or at the beginning of the iterable if it is
- # a number.
- if use_locale and group_letters:
- func = partial(numconv, key=lambda x: locale_convert(groupletters(x)))
- elif use_locale:
- func = partial(numconv, key=locale_convert)
- elif group_letters:
- func = partial(numconv, key=groupletters)
- else:
- func = numconv
- return list(_sep_inserter(py23_map(func, s),
- null_string if use_locale else ''))
+ return tuple(_natsort_key(
+ x, None, string_func, bytes_func, num_func
+ ) for x in val)
+ # If that failed, it must be a number.
+ except TypeError:
+ return num_func(val)
def _parse_bytes_function(alg):
diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py
index eac8c39..8b4bf80 100644
--- a/test_natsort/test_natsort.py
+++ b/test_natsort/test_natsort.py
@@ -57,7 +57,7 @@ def test_natsort_key_public_raises_DeprecationWarning_when_called():
# But it raises a deprecation warning
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
- assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.I)
+ assert natsort_key('a-5.034e2') == ('a-', 5, '.', 34, 'e', 2)
assert len(w) == 1
assert "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" in str(w[-1].message)
# It is called for each element in a list when sorting
@@ -68,19 +68,17 @@ def test_natsort_key_public_raises_DeprecationWarning_when_called():
assert len(w) == 7
-def test_natsort_keygen_returns_natsort_key_with_alg_option():
- a = 'a-5.034e1'
- assert natsort_keygen()(a) == _natsort_key(a, None, ns.I)
- assert natsort_keygen(alg=ns.F | ns.S)(a) == _natsort_key(a, None, ns.F | ns.S)
+def test_natsort_keygen_with_invalid_alg_input_raises_ValueError():
+ # Invalid arguments give the correct response
+ with raises(ValueError) as err:
+ natsort_keygen(None, '1')
+ assert str(err.value) == "natsort_keygen: 'alg' argument must be from the enum 'ns', got 1"
-def test_natsort_keygen_with_key_returns_same_result_as_nested_lambda_with_bare_natsort_key():
+def test_natsort_keygen_returns_natsort_key_that_parses_input():
a = 'a-5.034e1'
- f1 = natsort_keygen(key=lambda x: x.upper())
-
- def f2(x):
- return _natsort_key(x, lambda y: y.upper(), ns.I)
- assert f1(a) == f2(a)
+ assert natsort_keygen()(a) == ('a-', 5, '.', 34, 'e', 1)
+ assert natsort_keygen(alg=ns.F | ns.S)(a) == ('a', -50.34)
def test_natsort_keygen_returns_key_that_can_be_used_to_sort_list_in_place_with_same_result_as_natsorted():
@@ -300,12 +298,12 @@ def test_natsorted_with_LOCALE_and_en_setting_returns_results_sorted_by_en_langu
locale.setlocale(locale.LC_ALL, str(''))
-@pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale')
-def test_natsorted_with_LOCALE_and_de_setting_returns_results_sorted_by_de_language():
- load_locale('de_DE')
- a = ['c', 'ä', 'b', 'a5,6', 'a5,50']
- assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,50', 'a5,6', 'ä', 'b', 'c']
- locale.setlocale(locale.LC_ALL, str(''))
+# @pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale')
+# def test_natsorted_with_LOCALE_and_de_setting_returns_results_sorted_by_de_language():
+# load_locale('de_DE')
+# a = ['c', 'ä', 'b', 'a5,6', 'a5,50']
+# assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,50', 'a5,6', 'ä', 'b', 'c']
+# locale.setlocale(locale.LC_ALL, str(''))
def test_natsorted_with_LOCALE_and_mixed_input_returns_sorted_results_without_error():
diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py
index d56f0b4..737b3a7 100644
--- a/test_natsort/test_utils.py
+++ b/test_natsort/test_utils.py
@@ -3,20 +3,18 @@
from __future__ import unicode_literals
import sys
-import locale
import pathlib
import pytest
import string
from math import isnan, isinf
-from operator import itemgetter, neg as op_neg
-from itertools import chain
+from operator import neg as op_neg
from pytest import raises
from natsort.ns_enum import ns
from natsort.utils import (
- _number_extracter,
_sep_inserter,
_natsort_key,
_args_to_enum,
+ _regex_chooser,
_float_sign_exp_re,
_float_nosign_exp_re,
_float_sign_noexp_re,
@@ -42,7 +40,6 @@ from natsort.compat.py23 import py23_str
from natsort.compat.locale import (
use_pyicu,
null_string,
- dumb_sort,
)
from natsort.compat.fastnumbers import (
fast_float,
@@ -54,12 +51,6 @@ from slow_splitters import (
sep_inserter,
add_leading_space_if_first_is_num,
)
-from compat.locale import (
- load_locale,
- get_strxfrm,
- low,
- bad_uni_chars,
-)
from compat.hypothesis import (
assume,
given,
@@ -77,8 +68,6 @@ from compat.locale import bad_uni_chars
if sys.version[0] == '3':
long = int
-ichain = chain.from_iterable
-
def test_do_decoding_decodes_bytes_string_to_unicode():
assert type(_do_decoding(b'bytes', 'ascii')) is py23_str
@@ -147,14 +136,48 @@ def test_args_to_enum_converts_None_to_ns_IU():
assert _args_to_enum(**{'number_type': None,
'exp': True}) == ns.I | ns.U
-float_locale_group = (fast_float, True, True)
-float_locale_nogroup = (fast_float, True, False)
-float_nolocale_group = (fast_float, False, True)
-float_nolocale_nogroup = (fast_float, False, False)
-int_locale_group = (fast_int, True, True)
-int_locale_nogroup = (fast_int, True, False)
-int_nolocale_group = (fast_int, False, True)
-int_nolocale_nogroup = (fast_int, False, False)
+
+def test_regex_chooser_returns_correct_regular_expression_object():
+ assert _regex_chooser[ns.INT] is _int_nosign_re
+ assert _regex_chooser[ns.INT | ns.NOEXP] is _int_nosign_re
+ assert _regex_chooser[ns.INT | ns.SIGNED] is _int_sign_re
+ assert _regex_chooser[ns.INT | ns.SIGNED | ns.NOEXP] is _int_sign_re
+ assert _regex_chooser[ns.FLOAT] is _float_nosign_exp_re
+ assert _regex_chooser[ns.FLOAT | ns.NOEXP] is _float_nosign_noexp_re
+ assert _regex_chooser[ns.FLOAT | ns.SIGNED] is _float_sign_exp_re
+ assert _regex_chooser[ns.FLOAT | ns.SIGNED | ns.NOEXP] is _float_sign_noexp_re
+
+
+def test_ns_enum_values_have_are_as_expected():
+ # Defaults
+ assert ns.TYPESAFE == 0
+ assert ns.INT == 0
+ assert ns.VERSION == 0
+ assert ns.DIGIT == 0
+ assert ns.UNSIGNED == 0
+
+ # Aliases
+ assert ns.TYPESAFE == ns.T
+ assert ns.INT == ns.I
+ assert ns.VERSION == ns.V
+ assert ns.DIGIT == ns.D
+ assert ns.UNSIGNED == ns.U
+ assert ns.FLOAT == ns.F
+ assert ns.SIGNED == ns.S
+ assert ns.NOEXP == ns.N
+ assert ns.PATH == ns.P
+ assert ns.LOCALE == ns.L
+ assert ns.IGNORECASE == ns.IC
+ assert ns.LOWERCASEFIRST == ns.LF
+ assert ns.GROUPLETTERS == ns.G
+ assert ns.UNGROUPLETTERS == ns.UG
+ assert ns.CAPITALFIRST == ns.C
+ assert ns.UNGROUPLETTERS == ns.CAPITALFIRST
+ assert ns.NANLAST == ns.NL
+
+ # Convenience
+ assert ns.REAL == ns.FLOAT | ns.SIGNED
+ assert ns._NUMERIC_ONLY == ns.REAL | ns.NOEXP
def test_chain_functions_is_a_no_op_if_no_functions_are_given():
@@ -609,279 +632,79 @@ def test_parse_string_function_selects_pre_function_value_if_not_dumb():
def test_parse_path_function_parses_string_as_path_then_as_string():
splt = _parse_string_function(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)
assert _parse_path_function(splt)('/p/Folder (10)/file34.5nm (2).tar.gz') == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
+ assert _parse_path_function(splt)('../Folder (10)/file (2).tar.gz') == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',))
+ assert _parse_path_function(splt)('Folder (10)/file.f34.5nm (2).tar.gz') == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
-def test__natsort_key_with_nan_input_transforms_nan_to_negative_inf():
- assert _natsort_key('nan', None, ns.FLOAT) == ('', float('-inf'))
- assert _natsort_key(float('nan'), None, 0) == ('', float('-inf'))
-
-
-def test__natsort_key_with_nan_input_and_NANLAST_transforms_nan_to_positive_inf():
- assert _natsort_key('nan', None, ns.FLOAT | ns.NANLAST) == ('', float('+inf'))
- assert _natsort_key(float('nan'), None, ns.NANLAST) == ('', float('+inf'))
- assert ns.NL == ns.NANLAST
+# Just a few tests to make sure _natsort_key is working
+regex = _regex_chooser[ns.INT]
+pre = _pre_split_function(ns.INT)
+post = _post_split_function(ns.INT)
+after = _post_string_parse_function(ns.INT, '')
+string_func = _parse_string_function(ns.INT, '', regex.split, pre, post, after)
+bytes_func = _parse_bytes_function(ns.INT)
+num_func = _parse_number_function(ns.INT, '')
-# The remaining tests provide no examples, just hypothesis tests.
-# They only confirm that _natsort_key uses the above building blocks.
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_float_and_signed_splits_input_into_string_and_signed_float_with_exponent(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert ns.F == ns.FLOAT
- assert ns.S == ns.SIGNED
- assert _natsort_key(s, None, ns.F | ns.S) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nolocale_nogroup))
-
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_real_splits_input_into_string_and_signed_float_with_exponent(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert ns.R == ns.F | ns.S
- assert _natsort_key(s, None, ns.R) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nolocale_nogroup))
-
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_real_matches_signed_float(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _natsort_key(s, None, ns.R) == _natsort_key(s, None, ns.F | ns.S)
-
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_float_and_signed_and_noexp_splits_input_into_string_and_signed_float_without_exponent(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert ns.N == ns.NOEXP
- assert _natsort_key(s, None, ns.F | ns.S | ns.N) == tuple(_number_extracter(s, _float_sign_noexp_re, *float_nolocale_nogroup))
-
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_float_and_unsigned_splits_input_into_string_and_unsigned_float(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert ns.U == ns.UNSIGNED
- assert _natsort_key(s, None, ns.F | ns.U) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nolocale_nogroup))
- # Default is unsigned search
- assert _natsort_key(s, None, ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nolocale_nogroup))
-
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_float_and_noexp_splits_input_into_string_and_unsigned_float_without_exponent(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _natsort_key(s, None, ns.F | ns.N) == tuple(_number_extracter(s, _float_nosign_noexp_re, *float_nolocale_nogroup))
-
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_int_splits_input_into_string_and_unsigned_int(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert ns.I == ns.INT
- assert _natsort_key(s, None, ns.INT) == tuple(_number_extracter(s, _int_nosign_re, *int_nolocale_nogroup))
- # Default is int search
- assert _natsort_key(s, None, ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nolocale_nogroup))
- # NOEXP is ignored for integers
- assert _natsort_key(s, None, ns.I | ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nolocale_nogroup))
-
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_int_splits_and_signed_input_into_string_and_signed_int(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _natsort_key(s, None, ns.INT | ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nolocale_nogroup))
- assert _natsort_key(s, None, ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nolocale_nogroup))
-
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_version_or_digit_matches_usigned_int(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _natsort_key(s, None, ns.VERSION) == _natsort_key(s, None, ns.INT | ns.UNSIGNED)
- assert _natsort_key(s, None, ns.DIGIT) == _natsort_key(s, None, ns.VERSION)
+def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple():
+ # It gracefully handles as_path for numeric input by putting an extra tuple around it
+ # so it will sort against the other as_path results.
+ sfunc = _parse_path_function(string_func)
+ bytes_func = _parse_bytes_function(ns.PATH)
+ num_func = _parse_number_function(ns.PATH, '')
+ assert _natsort_key(10, None, sfunc, bytes_func, num_func) == (('', 10),)
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_key_applies_key_function_before_splitting(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _natsort_key(s, lambda x: x.upper(), ns.I) == tuple(_number_extracter(s.upper(), _int_nosign_re, *int_nolocale_nogroup))
+def test__natsort_key_with_bytes_input_and_PATH_returns_number_in_nested_tuple():
+ # It gracefully handles as_path for numeric input by putting an extra tuple around it
+ # so it will sort against the other as_path results.
+ sfunc = _parse_path_function(string_func)
+ bytes_func = _parse_bytes_function(ns.PATH)
+ num_func = _parse_number_function(ns.PATH, '')
+ assert _natsort_key(b'/hello/world', None, sfunc, bytes_func, num_func) == ((b'/hello/world',),)
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_tuple_input_returns_nested_tuples(x):
- # Iterables are parsed recursively so you can sort lists of lists.
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- t = tuple(_number_extracter(s, _int_nosign_re, *int_nolocale_nogroup))
- assert _natsort_key((s, s), None, ns.I) == (t, t)
+def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple():
+ # PATH also handles recursion well.
+ sfunc = _parse_path_function(string_func)
+ bytes_func = _parse_bytes_function(ns.PATH)
+ num_func = _parse_number_function(ns.PATH, '')
+ assert _natsort_key(('/Folder', '/Folder (1)'), None, sfunc, bytes_func, num_func) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')')))
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_tuple_input_but_itemgetter_key_returns_split_second_element(x):
- # A key is applied before recursion, but not in the recursive calls.
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- t = tuple(_number_extracter(s, _int_nosign_re, *int_nolocale_nogroup))
- assert _natsort_key((s, s), itemgetter(1), ns.I) == t
+# The remaining tests provide no examples, just hypothesis tests.
+# They only confirm that _natsort_key uses the above building blocks.
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(floats())
-def test__natsort_key_with_numeric_input_returns_number_with_leading_empty_string(x):
+@given(floats() | integers())
+def test__natsort_key_with_numeric_input_takes_number_path(x):
assume(not isnan(x))
- if x.is_integer():
- x = int(x)
- assert _natsort_key(x, None, ns.I) == ('', x)
-
-
-def test__natsort_key_with_invalid_alg_input_raises_ValueError():
- # Invalid arguments give the correct response
- with raises(ValueError) as err:
- _natsort_key('a', None, '1')
- assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1"
-
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_IGNORECASE_lowercases_text(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- try:
- assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.casefold(), _int_nosign_re, *int_nolocale_nogroup))
- except AttributeError:
- assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.lower(), _int_nosign_re, *int_nolocale_nogroup))
-
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_LOWERCASEFIRST_inverts_text_case(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _natsort_key(s, None, ns.LOWERCASEFIRST) == tuple(_number_extracter(s.swapcase(), _int_nosign_re, *int_nolocale_nogroup))
+ assert _natsort_key(x, None, string_func, bytes_func, num_func) == num_func(x)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_GROUPLETTERS_doubles_text_with_lowercase_letter_first(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- t = _number_extracter(s, _int_nosign_re, *int_nolocale_nogroup)
- assert _natsort_key(s, None, ns.GROUPLETTERS) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t)
-
-
-@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_GROUPLETTERS_and_LOWERCASEFIRST_inverts_text_first_then_doubles_letters_with_lowercase_letter_first(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- t = _number_extracter(s.swapcase(), _int_nosign_re, *int_nolocale_nogroup)
- assert _natsort_key(s, None, ns.G | ns.LF) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t)
-
-
-def test__natsort_key_with_bytes_input_only_applies_LOWERCASEFIRST_or_IGNORECASE_and_returns_in_tuple():
- if sys.version[0] == '3':
- assert _natsort_key(b'Apple56', None, ns.I) == (b'Apple56',)
- assert _natsort_key(b'Apple56', None, ns.LF) == (b'Apple56',)
- assert _natsort_key(b'Apple56', None, ns.IC) == (b'apple56',)
- assert _natsort_key(b'Apple56', None, ns.G) == (b'Apple56',)
- else:
- assert True
+@given(binary())
+def test__natsort_key_with_bytes_input_takes_bytes_path(x):
+ assert _natsort_key(x, None, string_func, bytes_func, num_func) == bytes_func(x)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_LOCALE_transforms_floats_according_to_the_current_locale_and_strxfrms_strings(x):
- # Locale aware sorting
+def test__natsort_key_with_text_input_takes_string_path(x):
assume(not any(type(y) == float and isnan(y) for y in x))
- assume(not any(any(i in bad_uni_chars for i in y) for y in x if isinstance(y, py23_str)))
s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- load_locale('en_US')
- if dumb_sort():
- assert _natsort_key(s, None, ns.LOCALE | ns.F) == tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_locale_group))
- else:
- assert _natsort_key(s, None, ns.LOCALE | ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_locale_nogroup))
- locale.setlocale(locale.LC_NUMERIC, str(''))
+ assert _natsort_key(s, None, string_func, bytes_func, num_func) == string_func(s)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_LOCALE_and_UNGROUPLETTERS_places_space_before_string_with_capital_first_letter(x):
- # Locale aware sorting
- assume(not any(type(y) == float and isnan(y) for y in x))
- assume(not any(any(i in bad_uni_chars for i in y) for y in x if isinstance(y, py23_str)))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- load_locale('en_US')
- if dumb_sort():
- t = tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_locale_group))
- else:
- t = tuple(_number_extracter(s, _float_nosign_exp_re, *float_locale_nogroup))
- if not t:
- r = (t, t)
- elif t[0] in (null_string, get_strxfrm()(b'\x00') if sys.version[0] == '2' and not use_pyicu else null_string):
- r = ((b'' if use_pyicu else '',), t)
- else:
- r = ((s[0],), t)
- assert _natsort_key(s, None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == r
- # The below are all aliases for UNGROUPLETTERS
- assert ns.UNGROUPLETTERS == ns.UG
- assert ns.UNGROUPLETTERS == ns.CAPITALFIRST
- assert ns.UNGROUPLETTERS == ns.C
- locale.setlocale(locale.LC_NUMERIC, str(''))
+@given(lists(elements=text(), min_size=1, max_size=10))
+def test__natsort_key_with_nested_input_takes_nested_path(x):
+ assert _natsort_key(x, None, string_func, bytes_func, num_func) == tuple(string_func(s) for s in x)
@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
-@given(lists(elements=floats() | text() | integers(), min_size=1, max_size=10))
-def test__natsort_key_with_UNGROUPLETTERS_does_nothing_without_LOCALE(x):
- assume(not any(type(y) == float and isnan(y) for y in x))
- s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
- assert _natsort_key(s, None, ns.UG | ns.I) == _natsort_key(s, None, ns.I)
-
-
-# It is difficult to generate code that will create random filesystem paths,
-# so "example" based tests are given for the PATH option.
-
-
-def test__natsort_key_with_absolute_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_root_and_split_extensions():
- # Turn on PATH to split a file path into components
- assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', None, ns.PATH | ns.F) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
-
-
-def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_relative_parent_and_split_extensions():
- assert _natsort_key('../Folder (10)/file (2).tar.gz', None, ns.PATH | ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',))
-
-
-def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_and_split_extensions():
- assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', None, ns.PATH | ns.F) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
-
-
-def test__natsort_key_with_pathlib_intput_and_PATH_returns_nested_tuples():
- # Converts pathlib PurePath (and subclass) objects to string before sorting
- assert _natsort_key(pathlib.Path('../Folder (10)/file (2).tar.gz'), None, ns.PATH | ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',))
-
-
-def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple():
- # It gracefully handles as_path for numeric input by putting an extra tuple around it
- # so it will sort against the other as_path results.
- assert _natsort_key(10, None, ns.PATH) == (('', 10),)
-
-
-def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple():
- # PATH also handles recursion well.
- assert _natsort_key(('/Folder', '/Folder (1)'), None, ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')')))
+@given(text())
+def test__natsort_key_with_key_argument_applies_key_before_processing(x):
+ assert _natsort_key(x, len, string_func, bytes_func, num_func) == num_func(len(x))