From 50389e16d3aba5139890e14d57257320a9bc7e11 Mon Sep 17 00:00:00 2001 From: Seth Morton Date: Mon, 27 Feb 2023 00:22:59 -0800 Subject: Add presort to natsorted and friends This will sort the collection as strings before sorting with the natsort algorithm. This ensures that strings that are different but represent the same numerical value get sorted independent of input order. --- natsort/natsort.py | 13 ++++++++++++- natsort/ns_enum.py | 9 +++++++++ tests/test_natsorted.py | 29 +++++++++++++++++++++++++++++ tests/test_natsorted_convenience.py | 7 +++++++ tests/test_ns_enum.py | 2 ++ tests/test_os_sorted.py | 7 +++++++ 6 files changed, 66 insertions(+), 1 deletion(-) diff --git a/natsort/natsort.py b/natsort/natsort.py index ea83e48..2325443 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -288,6 +288,8 @@ def natsorted( ['num2', 'num3', 'num5'] """ + if alg & ns.PRESORT: + seq = sorted(seq, reverse=reverse, key=str) return sorted(seq, reverse=reverse, key=natsort_keygen(key, alg)) @@ -477,6 +479,8 @@ def index_natsorted( # Pair the index and sequence together, then sort by element index_seq_pair = [(x, y) for x, y in enumerate(seq)] + if alg & ns.PRESORT: + index_seq_pair.sort(reverse=reverse, key=lambda x: str(itemgetter(1)(x))) index_seq_pair.sort(reverse=reverse, key=natsort_keygen(newkey, alg)) return [x for x, _ in index_seq_pair] @@ -768,6 +772,7 @@ def os_sorted( seq: Iterable[T], key: Optional[Callable[[T], NatsortInType]] = None, reverse: bool = False, + presort: bool = False, ) -> List[T]: """ Sort elements in the same order as your operating system's file browser @@ -810,6 +815,10 @@ def os_sorted( Return the list in reversed sorted order. The default is `False`. + presort : {{True, False}}, optional + Equivalent to adding ``ns.PRESORT``, see :class:`ns` for + documentation. The default is `False`. + Returns ------- out : list @@ -825,4 +834,6 @@ def os_sorted( This will implicitly coerce all inputs to str before collating. """ - return sorted(seq, key=os_sort_keygen(key), reverse=reverse) + if presort: + seq = sorted(seq, reverse=reverse, key=str) + return sorted(seq, reverse=reverse, key=os_sort_keygen(key)) diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py index c147909..02f970f 100644 --- a/natsort/ns_enum.py +++ b/natsort/ns_enum.py @@ -114,6 +114,14 @@ class ns(enum.IntEnum): # noqa: N801 treat these as +Infinity and place them after all the other numbers. By default, an NaN be treated as -Infinity and be placed first. Note that this ``None`` is treated like NaN internally. + PRESORT, PS + Sort the input as strings before sorting with the `nasort` + algorithm. This can help eliminate inconsistent sorting in cases + where two different strings represent the same number. For example, + "a1" and "a01" both are internally represented as ("a", "1), so + without `PRESORT` the order of these two values would depend on + the order they appeared in the input (because Python's `sorted` + is a stable sorting algorithm). Notes ----- @@ -143,6 +151,7 @@ class ns(enum.IntEnum): # noqa: N801 NANLAST = NL = 1 << next(_counter) COMPATIBILITYNORMALIZE = CN = 1 << next(_counter) NUMAFTER = NA = 1 << next(_counter) + PRESORT = PS = 1 << next(_counter) # Following were previously options but are now defaults. DEFAULT = 0 diff --git a/tests/test_natsorted.py b/tests/test_natsorted.py index 3d6375c..e4a4788 100644 --- a/tests/test_natsorted.py +++ b/tests/test_natsorted.py @@ -378,3 +378,32 @@ def test_natsorted_sorts_mixed_ascii_and_non_ascii_numbers() -> None: "street ۱۲", ] assert natsorted(given, alg=ns.IGNORECASE) == expected + + +def test_natsort_sorts_consistently_with_presort() -> None: + # Demonstrate the problem: + # Sorting is order-dependent for values that have different + # string representations are equiavlent numerically. + given = ["a01", "a1.4500", "a1", "a1.45"] + expected = ["a01", "a1", "a1.4500", "a1.45"] + result = natsorted(given, alg=ns.FLOAT) + assert result == expected + + given = ["a1", "a1.45", "a01", "a1.4500"] + expected = ["a1", "a01", "a1.45", "a1.4500"] + result = natsorted(given, alg=ns.FLOAT) + assert result == expected + + # The solution - use "presort" which will sort the + # input by its string representation before sorting + # with natsorted, which gives consitent results even + # if the numeric representation is identical + expected = ["a01", "a1", "a1.45", "a1.4500"] + + given = ["a01", "a1.4500", "a1", "a1.45"] + result = natsorted(given, alg=ns.FLOAT | ns.PRESORT) + assert result == expected + + given = ["a1", "a1.45", "a01", "a1.4500"] + result = natsorted(given, alg=ns.FLOAT | ns.PRESORT) + assert result == expected diff --git a/tests/test_natsorted_convenience.py b/tests/test_natsorted_convenience.py index 0b2cd75..81bdf5c 100644 --- a/tests/test_natsorted_convenience.py +++ b/tests/test_natsorted_convenience.py @@ -88,6 +88,13 @@ def test_index_natsorted_applies_key_function_before_sorting() -> None: assert index_natsorted(given, key=itemgetter(1)) == expected +def test_index_natsorted_can_presort() -> None: + expected = [2, 0, 3, 1] + given = ["a1", "a1.4500", "a01", "a1.45"] + result = index_natsorted(given, alg=ns.FLOAT | ns.PRESORT) + assert result == expected + + def test_index_realsorted_is_identical_to_index_natsorted_with_real_alg( float_list: List[str], ) -> None: diff --git a/tests/test_ns_enum.py b/tests/test_ns_enum.py index 7a30718..c950812 100644 --- a/tests/test_ns_enum.py +++ b/tests/test_ns_enum.py @@ -18,6 +18,7 @@ from natsort import ns ("NANLAST", 0x0400), ("COMPATIBILITYNORMALIZE", 0x0800), ("NUMAFTER", 0x1000), + ("PRESORT", 0x2000), ("DEFAULT", 0x0000), ("INT", 0x0000), ("UNSIGNED", 0x0000), @@ -42,6 +43,7 @@ from natsort import ns ("NL", 0x0400), ("CN", 0x0800), ("NA", 0x1000), + ("PS", 0x2000), ], ) def test_ns_enum(given: str, expected: int) -> None: diff --git a/tests/test_os_sorted.py b/tests/test_os_sorted.py index f714437..c29c110 100644 --- a/tests/test_os_sorted.py +++ b/tests/test_os_sorted.py @@ -47,6 +47,13 @@ def test_os_sorted_key() -> None: assert result == expected +def test_os_sorted_can_presort() -> None: + given = ["a1", "a01"] + expected = ["a01", "a1"] + result = natsort.os_sorted(given, presort=True) + assert result == expected + + # The following is a master list of things that might give trouble # when sorting like the file explorer. given_characters = [ -- cgit v1.2.1