diff options
-rw-r--r-- | CHANGELOG.md | 7 | ||||
-rw-r--r-- | natsort/natsort.py | 13 | ||||
-rw-r--r-- | natsort/ns_enum.py | 9 | ||||
-rw-r--r-- | natsort/utils.py | 12 | ||||
-rw-r--r-- | tests/test_natsorted.py | 58 | ||||
-rw-r--r-- | tests/test_natsorted_convenience.py | 7 | ||||
-rw-r--r-- | tests/test_ns_enum.py | 2 | ||||
-rw-r--r-- | tests/test_os_sorted.py | 7 | ||||
-rw-r--r-- | tests/test_parse_number_function.py | 20 |
9 files changed, 119 insertions, 16 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 2fdfe57..904a1b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ Unreleased --- +### Added +- The `presort` option to `natsorted` and friends to attain consistent + sort order in certain corner cases (Issue + [#149](https://github.com/SethMMorton/natsort/issues/149)) +- Logic to ensure `None` and NaN are sorted in a consistent order + (Issue [#149](https://github.com/SethMMorton/natsort/issues/149)) + ### Changed - Only convert to `str` if necessary in `os_sorted` ([@Dobatymo](https://github.com/Dobatymo), issues diff --git a/natsort/natsort.py b/natsort/natsort.py index ea83e48..2325443 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -288,6 +288,8 @@ def natsorted( ['num2', 'num3', 'num5'] """ + if alg & ns.PRESORT: + seq = sorted(seq, reverse=reverse, key=str) return sorted(seq, reverse=reverse, key=natsort_keygen(key, alg)) @@ -477,6 +479,8 @@ def index_natsorted( # Pair the index and sequence together, then sort by element index_seq_pair = [(x, y) for x, y in enumerate(seq)] + if alg & ns.PRESORT: + index_seq_pair.sort(reverse=reverse, key=lambda x: str(itemgetter(1)(x))) index_seq_pair.sort(reverse=reverse, key=natsort_keygen(newkey, alg)) return [x for x, _ in index_seq_pair] @@ -768,6 +772,7 @@ def os_sorted( seq: Iterable[T], key: Optional[Callable[[T], NatsortInType]] = None, reverse: bool = False, + presort: bool = False, ) -> List[T]: """ Sort elements in the same order as your operating system's file browser @@ -810,6 +815,10 @@ def os_sorted( Return the list in reversed sorted order. The default is `False`. + presort : {{True, False}}, optional + Equivalent to adding ``ns.PRESORT``, see :class:`ns` for + documentation. The default is `False`. + Returns ------- out : list @@ -825,4 +834,6 @@ def os_sorted( This will implicitly coerce all inputs to str before collating. """ - return sorted(seq, key=os_sort_keygen(key), reverse=reverse) + if presort: + seq = sorted(seq, reverse=reverse, key=str) + return sorted(seq, reverse=reverse, key=os_sort_keygen(key)) diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py index c147909..02f970f 100644 --- a/natsort/ns_enum.py +++ b/natsort/ns_enum.py @@ -114,6 +114,14 @@ class ns(enum.IntEnum): # noqa: N801 treat these as +Infinity and place them after all the other numbers. By default, an NaN be treated as -Infinity and be placed first. Note that this ``None`` is treated like NaN internally. + PRESORT, PS + Sort the input as strings before sorting with the `nasort` + algorithm. This can help eliminate inconsistent sorting in cases + where two different strings represent the same number. For example, + "a1" and "a01" both are internally represented as ("a", "1), so + without `PRESORT` the order of these two values would depend on + the order they appeared in the input (because Python's `sorted` + is a stable sorting algorithm). Notes ----- @@ -143,6 +151,7 @@ class ns(enum.IntEnum): # noqa: N801 NANLAST = NL = 1 << next(_counter) COMPATIBILITYNORMALIZE = CN = 1 << next(_counter) NUMAFTER = NA = 1 << next(_counter) + PRESORT = PS = 1 << next(_counter) # Following were previously options but are now defaults. DEFAULT = 0 diff --git a/natsort/utils.py b/natsort/utils.py index b86225e..2a3745f 100644 --- a/natsort/utils.py +++ b/natsort/utils.py @@ -420,7 +420,17 @@ def parse_number_or_none_factory( val: Any, _nan_replace: float = nan_replace, _sep: StrOrBytes = sep ) -> BasicTuple: """Given a number, place it in a tuple with a leading null string.""" - return _sep, (_nan_replace if val != val or val is None else val) + # Add a trailing string numbers equaling _nan_replace. This will make + # the ordering between None NaN, and the NaN replacement value... + # None comes first, then NaN, then the replacement value. + if val is None: + return _sep, _nan_replace, "1" + elif val != val: + return _sep, _nan_replace, "2" + elif val == _nan_replace: + return _sep, _nan_replace, "3" + else: + return _sep, val # Return the function, possibly wrapping in tuple if PATH is selected. if alg & ns.PATH and alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: diff --git a/tests/test_natsorted.py b/tests/test_natsorted.py index eccb9d2..e4a4788 100644 --- a/tests/test_natsorted.py +++ b/tests/test_natsorted.py @@ -4,6 +4,7 @@ Here are a collection of examples of how this module can be used. See the README or the natsort homepage for more details. """ +import math from operator import itemgetter from pathlib import PurePosixPath from typing import List, Tuple, Union @@ -110,19 +111,29 @@ def test_natsorted_handles_mixed_types( @pytest.mark.parametrize( - "alg, expected, slc", + "alg, expected", [ - (ns.DEFAULT, [float("nan"), 5, "25", 1e40], slice(1, None)), - (ns.NANLAST, [5, "25", 1e40, float("nan")], slice(None, 3)), + (ns.DEFAULT, [None, float("nan"), float("-inf"), 5, "25", 1e40, float("inf")]), + (ns.NANLAST, [float("-inf"), 5, "25", 1e40, None, float("nan"), float("inf")]), ], ) -def test_natsorted_handles_nan( - alg: NSType, expected: List[Union[str, float, int]], slc: slice +def test_natsorted_consistent_ordering_with_nan_and_friends( + alg: NSType, expected: List[Union[str, float, None, int]] ) -> None: - given: List[Union[str, float, int]] = ["25", 5, float("nan"), 1e40] - # The slice is because NaN != NaN - # noinspection PyUnresolvedReferences - assert natsorted(given, alg=alg)[slc] == expected[slc] + sentinel = math.pi + expected = [sentinel if x != x else x for x in expected] + given: List[Union[str, float, None, int]] = [ + float("inf"), + float("-inf"), + "25", + 5, + float("nan"), + 1e40, + None, + ] + result = natsorted(given, alg=alg) + result = [sentinel if x != x else x for x in result] + assert result == expected def test_natsorted_with_mixed_bytes_and_str_input_raises_type_error() -> None: @@ -367,3 +378,32 @@ def test_natsorted_sorts_mixed_ascii_and_non_ascii_numbers() -> None: "street ۱۲", ] assert natsorted(given, alg=ns.IGNORECASE) == expected + + +def test_natsort_sorts_consistently_with_presort() -> None: + # Demonstrate the problem: + # Sorting is order-dependent for values that have different + # string representations are equiavlent numerically. + given = ["a01", "a1.4500", "a1", "a1.45"] + expected = ["a01", "a1", "a1.4500", "a1.45"] + result = natsorted(given, alg=ns.FLOAT) + assert result == expected + + given = ["a1", "a1.45", "a01", "a1.4500"] + expected = ["a1", "a01", "a1.45", "a1.4500"] + result = natsorted(given, alg=ns.FLOAT) + assert result == expected + + # The solution - use "presort" which will sort the + # input by its string representation before sorting + # with natsorted, which gives consitent results even + # if the numeric representation is identical + expected = ["a01", "a1", "a1.45", "a1.4500"] + + given = ["a01", "a1.4500", "a1", "a1.45"] + result = natsorted(given, alg=ns.FLOAT | ns.PRESORT) + assert result == expected + + given = ["a1", "a1.45", "a01", "a1.4500"] + result = natsorted(given, alg=ns.FLOAT | ns.PRESORT) + assert result == expected diff --git a/tests/test_natsorted_convenience.py b/tests/test_natsorted_convenience.py index 0b2cd75..81bdf5c 100644 --- a/tests/test_natsorted_convenience.py +++ b/tests/test_natsorted_convenience.py @@ -88,6 +88,13 @@ def test_index_natsorted_applies_key_function_before_sorting() -> None: assert index_natsorted(given, key=itemgetter(1)) == expected +def test_index_natsorted_can_presort() -> None: + expected = [2, 0, 3, 1] + given = ["a1", "a1.4500", "a01", "a1.45"] + result = index_natsorted(given, alg=ns.FLOAT | ns.PRESORT) + assert result == expected + + def test_index_realsorted_is_identical_to_index_natsorted_with_real_alg( float_list: List[str], ) -> None: diff --git a/tests/test_ns_enum.py b/tests/test_ns_enum.py index 7a30718..c950812 100644 --- a/tests/test_ns_enum.py +++ b/tests/test_ns_enum.py @@ -18,6 +18,7 @@ from natsort import ns ("NANLAST", 0x0400), ("COMPATIBILITYNORMALIZE", 0x0800), ("NUMAFTER", 0x1000), + ("PRESORT", 0x2000), ("DEFAULT", 0x0000), ("INT", 0x0000), ("UNSIGNED", 0x0000), @@ -42,6 +43,7 @@ from natsort import ns ("NL", 0x0400), ("CN", 0x0800), ("NA", 0x1000), + ("PS", 0x2000), ], ) def test_ns_enum(given: str, expected: int) -> None: diff --git a/tests/test_os_sorted.py b/tests/test_os_sorted.py index f714437..c29c110 100644 --- a/tests/test_os_sorted.py +++ b/tests/test_os_sorted.py @@ -47,6 +47,13 @@ def test_os_sorted_key() -> None: assert result == expected +def test_os_sorted_can_presort() -> None: + given = ["a1", "a01"] + expected = ["a01", "a1"] + result = natsort.os_sorted(given, presort=True) + assert result == expected + + # The following is a master list of things that might give trouble # when sorting like the file explorer. given_characters = [ diff --git a/tests/test_parse_number_function.py b/tests/test_parse_number_function.py index 85d6b96..5ac5700 100644 --- a/tests/test_parse_number_function.py +++ b/tests/test_parse_number_function.py @@ -20,7 +20,7 @@ from natsort.utils import NumTransformer, parse_number_or_none_factory (ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, lambda x: ((("xx",), ("", x)),)), ], ) -@given(x=floats(allow_nan=False) | integers()) +@given(x=floats(allow_nan=False, allow_infinity=False) | integers()) def test_parse_number_factory_makes_function_that_returns_tuple( x: Union[float, int], alg: NSType, example_func: NumTransformer ) -> None: @@ -32,10 +32,20 @@ def test_parse_number_factory_makes_function_that_returns_tuple( "alg, x, result", [ (ns.DEFAULT, 57, ("", 57)), - (ns.DEFAULT, float("nan"), ("", float("-inf"))), # NaN transformed to -infinity - (ns.NANLAST, float("nan"), ("", float("+inf"))), # NANLAST makes it +infinity - (ns.DEFAULT, None, ("", float("-inf"))), # None transformed to -infinity - (ns.NANLAST, None, ("", float("+inf"))), # NANLAST makes it +infinity + ( + ns.DEFAULT, + float("nan"), + ("", float("-inf"), "2"), + ), # NaN transformed to -infinity + ( + ns.NANLAST, + float("nan"), + ("", float("+inf"), "2"), + ), # NANLAST makes it +infinity + (ns.DEFAULT, None, ("", float("-inf"), "1")), # None transformed to -infinity + (ns.NANLAST, None, ("", float("+inf"), "1")), # NANLAST makes it +infinity + (ns.DEFAULT, float("-inf"), ("", float("-inf"), "3")), + (ns.NANLAST, float("+inf"), ("", float("+inf"), "3")), ], ) def test_parse_number_factory_treats_nan_and_none_special( |