diff options
-rw-r--r-- | natsort/natsort.py | 13 | ||||
-rw-r--r-- | natsort/ns_enum.py | 9 | ||||
-rw-r--r-- | tests/test_natsorted.py | 29 | ||||
-rw-r--r-- | tests/test_natsorted_convenience.py | 7 | ||||
-rw-r--r-- | tests/test_ns_enum.py | 2 | ||||
-rw-r--r-- | tests/test_os_sorted.py | 7 |
6 files changed, 66 insertions, 1 deletions
diff --git a/natsort/natsort.py b/natsort/natsort.py index ea83e48..2325443 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -288,6 +288,8 @@ def natsorted( ['num2', 'num3', 'num5'] """ + if alg & ns.PRESORT: + seq = sorted(seq, reverse=reverse, key=str) return sorted(seq, reverse=reverse, key=natsort_keygen(key, alg)) @@ -477,6 +479,8 @@ def index_natsorted( # Pair the index and sequence together, then sort by element index_seq_pair = [(x, y) for x, y in enumerate(seq)] + if alg & ns.PRESORT: + index_seq_pair.sort(reverse=reverse, key=lambda x: str(itemgetter(1)(x))) index_seq_pair.sort(reverse=reverse, key=natsort_keygen(newkey, alg)) return [x for x, _ in index_seq_pair] @@ -768,6 +772,7 @@ def os_sorted( seq: Iterable[T], key: Optional[Callable[[T], NatsortInType]] = None, reverse: bool = False, + presort: bool = False, ) -> List[T]: """ Sort elements in the same order as your operating system's file browser @@ -810,6 +815,10 @@ def os_sorted( Return the list in reversed sorted order. The default is `False`. + presort : {{True, False}}, optional + Equivalent to adding ``ns.PRESORT``, see :class:`ns` for + documentation. The default is `False`. + Returns ------- out : list @@ -825,4 +834,6 @@ def os_sorted( This will implicitly coerce all inputs to str before collating. """ - return sorted(seq, key=os_sort_keygen(key), reverse=reverse) + if presort: + seq = sorted(seq, reverse=reverse, key=str) + return sorted(seq, reverse=reverse, key=os_sort_keygen(key)) diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py index c147909..02f970f 100644 --- a/natsort/ns_enum.py +++ b/natsort/ns_enum.py @@ -114,6 +114,14 @@ class ns(enum.IntEnum): # noqa: N801 treat these as +Infinity and place them after all the other numbers. By default, an NaN be treated as -Infinity and be placed first. Note that this ``None`` is treated like NaN internally. + PRESORT, PS + Sort the input as strings before sorting with the `nasort` + algorithm. This can help eliminate inconsistent sorting in cases + where two different strings represent the same number. For example, + "a1" and "a01" both are internally represented as ("a", "1), so + without `PRESORT` the order of these two values would depend on + the order they appeared in the input (because Python's `sorted` + is a stable sorting algorithm). Notes ----- @@ -143,6 +151,7 @@ class ns(enum.IntEnum): # noqa: N801 NANLAST = NL = 1 << next(_counter) COMPATIBILITYNORMALIZE = CN = 1 << next(_counter) NUMAFTER = NA = 1 << next(_counter) + PRESORT = PS = 1 << next(_counter) # Following were previously options but are now defaults. DEFAULT = 0 diff --git a/tests/test_natsorted.py b/tests/test_natsorted.py index 3d6375c..e4a4788 100644 --- a/tests/test_natsorted.py +++ b/tests/test_natsorted.py @@ -378,3 +378,32 @@ def test_natsorted_sorts_mixed_ascii_and_non_ascii_numbers() -> None: "street ۱۲", ] assert natsorted(given, alg=ns.IGNORECASE) == expected + + +def test_natsort_sorts_consistently_with_presort() -> None: + # Demonstrate the problem: + # Sorting is order-dependent for values that have different + # string representations are equiavlent numerically. + given = ["a01", "a1.4500", "a1", "a1.45"] + expected = ["a01", "a1", "a1.4500", "a1.45"] + result = natsorted(given, alg=ns.FLOAT) + assert result == expected + + given = ["a1", "a1.45", "a01", "a1.4500"] + expected = ["a1", "a01", "a1.45", "a1.4500"] + result = natsorted(given, alg=ns.FLOAT) + assert result == expected + + # The solution - use "presort" which will sort the + # input by its string representation before sorting + # with natsorted, which gives consitent results even + # if the numeric representation is identical + expected = ["a01", "a1", "a1.45", "a1.4500"] + + given = ["a01", "a1.4500", "a1", "a1.45"] + result = natsorted(given, alg=ns.FLOAT | ns.PRESORT) + assert result == expected + + given = ["a1", "a1.45", "a01", "a1.4500"] + result = natsorted(given, alg=ns.FLOAT | ns.PRESORT) + assert result == expected diff --git a/tests/test_natsorted_convenience.py b/tests/test_natsorted_convenience.py index 0b2cd75..81bdf5c 100644 --- a/tests/test_natsorted_convenience.py +++ b/tests/test_natsorted_convenience.py @@ -88,6 +88,13 @@ def test_index_natsorted_applies_key_function_before_sorting() -> None: assert index_natsorted(given, key=itemgetter(1)) == expected +def test_index_natsorted_can_presort() -> None: + expected = [2, 0, 3, 1] + given = ["a1", "a1.4500", "a01", "a1.45"] + result = index_natsorted(given, alg=ns.FLOAT | ns.PRESORT) + assert result == expected + + def test_index_realsorted_is_identical_to_index_natsorted_with_real_alg( float_list: List[str], ) -> None: diff --git a/tests/test_ns_enum.py b/tests/test_ns_enum.py index 7a30718..c950812 100644 --- a/tests/test_ns_enum.py +++ b/tests/test_ns_enum.py @@ -18,6 +18,7 @@ from natsort import ns ("NANLAST", 0x0400), ("COMPATIBILITYNORMALIZE", 0x0800), ("NUMAFTER", 0x1000), + ("PRESORT", 0x2000), ("DEFAULT", 0x0000), ("INT", 0x0000), ("UNSIGNED", 0x0000), @@ -42,6 +43,7 @@ from natsort import ns ("NL", 0x0400), ("CN", 0x0800), ("NA", 0x1000), + ("PS", 0x2000), ], ) def test_ns_enum(given: str, expected: int) -> None: diff --git a/tests/test_os_sorted.py b/tests/test_os_sorted.py index f714437..c29c110 100644 --- a/tests/test_os_sorted.py +++ b/tests/test_os_sorted.py @@ -47,6 +47,13 @@ def test_os_sorted_key() -> None: assert result == expected +def test_os_sorted_can_presort() -> None: + given = ["a1", "a01"] + expected = ["a01", "a1"] + result = natsort.os_sorted(given, presort=True) + assert result == expected + + # The following is a master list of things that might give trouble # when sorting like the file explorer. given_characters = [ |