summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth Morton <seth.m.morton@gmail.com>2023-02-27 00:22:59 -0800
committerSeth Morton <seth.m.morton@gmail.com>2023-02-27 00:34:55 -0800
commit50389e16d3aba5139890e14d57257320a9bc7e11 (patch)
treeae00d7e693c7534aa2cc61b044ceae8236e54edd
parentcf2a55daf7e74d177c95149da623172b1b6d93ae (diff)
downloadnatsort-50389e16d3aba5139890e14d57257320a9bc7e11.tar.gz
Add presort to natsorted and friends
This will sort the collection as strings before sorting with the natsort algorithm. This ensures that strings that are different but represent the same numerical value get sorted independent of input order.
-rw-r--r--natsort/natsort.py13
-rw-r--r--natsort/ns_enum.py9
-rw-r--r--tests/test_natsorted.py29
-rw-r--r--tests/test_natsorted_convenience.py7
-rw-r--r--tests/test_ns_enum.py2
-rw-r--r--tests/test_os_sorted.py7
6 files changed, 66 insertions, 1 deletions
diff --git a/natsort/natsort.py b/natsort/natsort.py
index ea83e48..2325443 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -288,6 +288,8 @@ def natsorted(
['num2', 'num3', 'num5']
"""
+ if alg & ns.PRESORT:
+ seq = sorted(seq, reverse=reverse, key=str)
return sorted(seq, reverse=reverse, key=natsort_keygen(key, alg))
@@ -477,6 +479,8 @@ def index_natsorted(
# Pair the index and sequence together, then sort by element
index_seq_pair = [(x, y) for x, y in enumerate(seq)]
+ if alg & ns.PRESORT:
+ index_seq_pair.sort(reverse=reverse, key=lambda x: str(itemgetter(1)(x)))
index_seq_pair.sort(reverse=reverse, key=natsort_keygen(newkey, alg))
return [x for x, _ in index_seq_pair]
@@ -768,6 +772,7 @@ def os_sorted(
seq: Iterable[T],
key: Optional[Callable[[T], NatsortInType]] = None,
reverse: bool = False,
+ presort: bool = False,
) -> List[T]:
"""
Sort elements in the same order as your operating system's file browser
@@ -810,6 +815,10 @@ def os_sorted(
Return the list in reversed sorted order. The default is
`False`.
+ presort : {{True, False}}, optional
+ Equivalent to adding ``ns.PRESORT``, see :class:`ns` for
+ documentation. The default is `False`.
+
Returns
-------
out : list
@@ -825,4 +834,6 @@ def os_sorted(
This will implicitly coerce all inputs to str before collating.
"""
- return sorted(seq, key=os_sort_keygen(key), reverse=reverse)
+ if presort:
+ seq = sorted(seq, reverse=reverse, key=str)
+ return sorted(seq, reverse=reverse, key=os_sort_keygen(key))
diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py
index c147909..02f970f 100644
--- a/natsort/ns_enum.py
+++ b/natsort/ns_enum.py
@@ -114,6 +114,14 @@ class ns(enum.IntEnum): # noqa: N801
treat these as +Infinity and place them after all the other numbers.
By default, an NaN be treated as -Infinity and be placed first.
Note that this ``None`` is treated like NaN internally.
+ PRESORT, PS
+ Sort the input as strings before sorting with the `nasort`
+ algorithm. This can help eliminate inconsistent sorting in cases
+ where two different strings represent the same number. For example,
+ "a1" and "a01" both are internally represented as ("a", "1), so
+ without `PRESORT` the order of these two values would depend on
+ the order they appeared in the input (because Python's `sorted`
+ is a stable sorting algorithm).
Notes
-----
@@ -143,6 +151,7 @@ class ns(enum.IntEnum): # noqa: N801
NANLAST = NL = 1 << next(_counter)
COMPATIBILITYNORMALIZE = CN = 1 << next(_counter)
NUMAFTER = NA = 1 << next(_counter)
+ PRESORT = PS = 1 << next(_counter)
# Following were previously options but are now defaults.
DEFAULT = 0
diff --git a/tests/test_natsorted.py b/tests/test_natsorted.py
index 3d6375c..e4a4788 100644
--- a/tests/test_natsorted.py
+++ b/tests/test_natsorted.py
@@ -378,3 +378,32 @@ def test_natsorted_sorts_mixed_ascii_and_non_ascii_numbers() -> None:
"street ۱۲",
]
assert natsorted(given, alg=ns.IGNORECASE) == expected
+
+
+def test_natsort_sorts_consistently_with_presort() -> None:
+ # Demonstrate the problem:
+ # Sorting is order-dependent for values that have different
+ # string representations are equiavlent numerically.
+ given = ["a01", "a1.4500", "a1", "a1.45"]
+ expected = ["a01", "a1", "a1.4500", "a1.45"]
+ result = natsorted(given, alg=ns.FLOAT)
+ assert result == expected
+
+ given = ["a1", "a1.45", "a01", "a1.4500"]
+ expected = ["a1", "a01", "a1.45", "a1.4500"]
+ result = natsorted(given, alg=ns.FLOAT)
+ assert result == expected
+
+ # The solution - use "presort" which will sort the
+ # input by its string representation before sorting
+ # with natsorted, which gives consitent results even
+ # if the numeric representation is identical
+ expected = ["a01", "a1", "a1.45", "a1.4500"]
+
+ given = ["a01", "a1.4500", "a1", "a1.45"]
+ result = natsorted(given, alg=ns.FLOAT | ns.PRESORT)
+ assert result == expected
+
+ given = ["a1", "a1.45", "a01", "a1.4500"]
+ result = natsorted(given, alg=ns.FLOAT | ns.PRESORT)
+ assert result == expected
diff --git a/tests/test_natsorted_convenience.py b/tests/test_natsorted_convenience.py
index 0b2cd75..81bdf5c 100644
--- a/tests/test_natsorted_convenience.py
+++ b/tests/test_natsorted_convenience.py
@@ -88,6 +88,13 @@ def test_index_natsorted_applies_key_function_before_sorting() -> None:
assert index_natsorted(given, key=itemgetter(1)) == expected
+def test_index_natsorted_can_presort() -> None:
+ expected = [2, 0, 3, 1]
+ given = ["a1", "a1.4500", "a01", "a1.45"]
+ result = index_natsorted(given, alg=ns.FLOAT | ns.PRESORT)
+ assert result == expected
+
+
def test_index_realsorted_is_identical_to_index_natsorted_with_real_alg(
float_list: List[str],
) -> None:
diff --git a/tests/test_ns_enum.py b/tests/test_ns_enum.py
index 7a30718..c950812 100644
--- a/tests/test_ns_enum.py
+++ b/tests/test_ns_enum.py
@@ -18,6 +18,7 @@ from natsort import ns
("NANLAST", 0x0400),
("COMPATIBILITYNORMALIZE", 0x0800),
("NUMAFTER", 0x1000),
+ ("PRESORT", 0x2000),
("DEFAULT", 0x0000),
("INT", 0x0000),
("UNSIGNED", 0x0000),
@@ -42,6 +43,7 @@ from natsort import ns
("NL", 0x0400),
("CN", 0x0800),
("NA", 0x1000),
+ ("PS", 0x2000),
],
)
def test_ns_enum(given: str, expected: int) -> None:
diff --git a/tests/test_os_sorted.py b/tests/test_os_sorted.py
index f714437..c29c110 100644
--- a/tests/test_os_sorted.py
+++ b/tests/test_os_sorted.py
@@ -47,6 +47,13 @@ def test_os_sorted_key() -> None:
assert result == expected
+def test_os_sorted_can_presort() -> None:
+ given = ["a1", "a01"]
+ expected = ["a01", "a1"]
+ result = natsort.os_sorted(given, presort=True)
+ assert result == expected
+
+
# The following is a master list of things that might give trouble
# when sorting like the file explorer.
given_characters = [