summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth Morton <seth.m.morton@gmail.com>2023-02-27 00:40:15 -0800
committerSeth Morton <seth.m.morton@gmail.com>2023-02-27 00:40:15 -0800
commit99a252e445a75a115431ee8fa18f487375ca9d20 (patch)
tree150a471469b36a9b03b0012303aa9c91cae4aeb6
parente778c1742fc94766b42110580809795605ca3c88 (diff)
parent92969665342450fb450904776a59bd8406dfe65a (diff)
downloadnatsort-99a252e445a75a115431ee8fa18f487375ca9d20.tar.gz
Merge branch 'introduce-consistent-sorting-for-corner-cases'
-rw-r--r--CHANGELOG.md7
-rw-r--r--natsort/natsort.py13
-rw-r--r--natsort/ns_enum.py9
-rw-r--r--natsort/utils.py12
-rw-r--r--tests/test_natsorted.py58
-rw-r--r--tests/test_natsorted_convenience.py7
-rw-r--r--tests/test_ns_enum.py2
-rw-r--r--tests/test_os_sorted.py7
-rw-r--r--tests/test_parse_number_function.py20
9 files changed, 119 insertions, 16 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2fdfe57..904a1b6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,13 @@
Unreleased
---
+### Added
+- The `presort` option to `natsorted` and friends to attain consistent
+ sort order in certain corner cases (Issue
+ [#149](https://github.com/SethMMorton/natsort/issues/149))
+- Logic to ensure `None` and NaN are sorted in a consistent order
+ (Issue [#149](https://github.com/SethMMorton/natsort/issues/149))
+
### Changed
- Only convert to `str` if necessary in `os_sorted`
([@Dobatymo](https://github.com/Dobatymo), issues
diff --git a/natsort/natsort.py b/natsort/natsort.py
index ea83e48..2325443 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -288,6 +288,8 @@ def natsorted(
['num2', 'num3', 'num5']
"""
+ if alg & ns.PRESORT:
+ seq = sorted(seq, reverse=reverse, key=str)
return sorted(seq, reverse=reverse, key=natsort_keygen(key, alg))
@@ -477,6 +479,8 @@ def index_natsorted(
# Pair the index and sequence together, then sort by element
index_seq_pair = [(x, y) for x, y in enumerate(seq)]
+ if alg & ns.PRESORT:
+ index_seq_pair.sort(reverse=reverse, key=lambda x: str(itemgetter(1)(x)))
index_seq_pair.sort(reverse=reverse, key=natsort_keygen(newkey, alg))
return [x for x, _ in index_seq_pair]
@@ -768,6 +772,7 @@ def os_sorted(
seq: Iterable[T],
key: Optional[Callable[[T], NatsortInType]] = None,
reverse: bool = False,
+ presort: bool = False,
) -> List[T]:
"""
Sort elements in the same order as your operating system's file browser
@@ -810,6 +815,10 @@ def os_sorted(
Return the list in reversed sorted order. The default is
`False`.
+ presort : {{True, False}}, optional
+ Equivalent to adding ``ns.PRESORT``, see :class:`ns` for
+ documentation. The default is `False`.
+
Returns
-------
out : list
@@ -825,4 +834,6 @@ def os_sorted(
This will implicitly coerce all inputs to str before collating.
"""
- return sorted(seq, key=os_sort_keygen(key), reverse=reverse)
+ if presort:
+ seq = sorted(seq, reverse=reverse, key=str)
+ return sorted(seq, reverse=reverse, key=os_sort_keygen(key))
diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py
index c147909..02f970f 100644
--- a/natsort/ns_enum.py
+++ b/natsort/ns_enum.py
@@ -114,6 +114,14 @@ class ns(enum.IntEnum): # noqa: N801
treat these as +Infinity and place them after all the other numbers.
By default, an NaN be treated as -Infinity and be placed first.
Note that this ``None`` is treated like NaN internally.
+ PRESORT, PS
+ Sort the input as strings before sorting with the `nasort`
+ algorithm. This can help eliminate inconsistent sorting in cases
+ where two different strings represent the same number. For example,
+ "a1" and "a01" both are internally represented as ("a", "1), so
+ without `PRESORT` the order of these two values would depend on
+ the order they appeared in the input (because Python's `sorted`
+ is a stable sorting algorithm).
Notes
-----
@@ -143,6 +151,7 @@ class ns(enum.IntEnum): # noqa: N801
NANLAST = NL = 1 << next(_counter)
COMPATIBILITYNORMALIZE = CN = 1 << next(_counter)
NUMAFTER = NA = 1 << next(_counter)
+ PRESORT = PS = 1 << next(_counter)
# Following were previously options but are now defaults.
DEFAULT = 0
diff --git a/natsort/utils.py b/natsort/utils.py
index b86225e..2a3745f 100644
--- a/natsort/utils.py
+++ b/natsort/utils.py
@@ -420,7 +420,17 @@ def parse_number_or_none_factory(
val: Any, _nan_replace: float = nan_replace, _sep: StrOrBytes = sep
) -> BasicTuple:
"""Given a number, place it in a tuple with a leading null string."""
- return _sep, (_nan_replace if val != val or val is None else val)
+ # Add a trailing string numbers equaling _nan_replace. This will make
+ # the ordering between None NaN, and the NaN replacement value...
+ # None comes first, then NaN, then the replacement value.
+ if val is None:
+ return _sep, _nan_replace, "1"
+ elif val != val:
+ return _sep, _nan_replace, "2"
+ elif val == _nan_replace:
+ return _sep, _nan_replace, "3"
+ else:
+ return _sep, val
# Return the function, possibly wrapping in tuple if PATH is selected.
if alg & ns.PATH and alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA:
diff --git a/tests/test_natsorted.py b/tests/test_natsorted.py
index eccb9d2..e4a4788 100644
--- a/tests/test_natsorted.py
+++ b/tests/test_natsorted.py
@@ -4,6 +4,7 @@ Here are a collection of examples of how this module can be used.
See the README or the natsort homepage for more details.
"""
+import math
from operator import itemgetter
from pathlib import PurePosixPath
from typing import List, Tuple, Union
@@ -110,19 +111,29 @@ def test_natsorted_handles_mixed_types(
@pytest.mark.parametrize(
- "alg, expected, slc",
+ "alg, expected",
[
- (ns.DEFAULT, [float("nan"), 5, "25", 1e40], slice(1, None)),
- (ns.NANLAST, [5, "25", 1e40, float("nan")], slice(None, 3)),
+ (ns.DEFAULT, [None, float("nan"), float("-inf"), 5, "25", 1e40, float("inf")]),
+ (ns.NANLAST, [float("-inf"), 5, "25", 1e40, None, float("nan"), float("inf")]),
],
)
-def test_natsorted_handles_nan(
- alg: NSType, expected: List[Union[str, float, int]], slc: slice
+def test_natsorted_consistent_ordering_with_nan_and_friends(
+ alg: NSType, expected: List[Union[str, float, None, int]]
) -> None:
- given: List[Union[str, float, int]] = ["25", 5, float("nan"), 1e40]
- # The slice is because NaN != NaN
- # noinspection PyUnresolvedReferences
- assert natsorted(given, alg=alg)[slc] == expected[slc]
+ sentinel = math.pi
+ expected = [sentinel if x != x else x for x in expected]
+ given: List[Union[str, float, None, int]] = [
+ float("inf"),
+ float("-inf"),
+ "25",
+ 5,
+ float("nan"),
+ 1e40,
+ None,
+ ]
+ result = natsorted(given, alg=alg)
+ result = [sentinel if x != x else x for x in result]
+ assert result == expected
def test_natsorted_with_mixed_bytes_and_str_input_raises_type_error() -> None:
@@ -367,3 +378,32 @@ def test_natsorted_sorts_mixed_ascii_and_non_ascii_numbers() -> None:
"street ۱۲",
]
assert natsorted(given, alg=ns.IGNORECASE) == expected
+
+
+def test_natsort_sorts_consistently_with_presort() -> None:
+ # Demonstrate the problem:
+ # Sorting is order-dependent for values that have different
+ # string representations are equiavlent numerically.
+ given = ["a01", "a1.4500", "a1", "a1.45"]
+ expected = ["a01", "a1", "a1.4500", "a1.45"]
+ result = natsorted(given, alg=ns.FLOAT)
+ assert result == expected
+
+ given = ["a1", "a1.45", "a01", "a1.4500"]
+ expected = ["a1", "a01", "a1.45", "a1.4500"]
+ result = natsorted(given, alg=ns.FLOAT)
+ assert result == expected
+
+ # The solution - use "presort" which will sort the
+ # input by its string representation before sorting
+ # with natsorted, which gives consitent results even
+ # if the numeric representation is identical
+ expected = ["a01", "a1", "a1.45", "a1.4500"]
+
+ given = ["a01", "a1.4500", "a1", "a1.45"]
+ result = natsorted(given, alg=ns.FLOAT | ns.PRESORT)
+ assert result == expected
+
+ given = ["a1", "a1.45", "a01", "a1.4500"]
+ result = natsorted(given, alg=ns.FLOAT | ns.PRESORT)
+ assert result == expected
diff --git a/tests/test_natsorted_convenience.py b/tests/test_natsorted_convenience.py
index 0b2cd75..81bdf5c 100644
--- a/tests/test_natsorted_convenience.py
+++ b/tests/test_natsorted_convenience.py
@@ -88,6 +88,13 @@ def test_index_natsorted_applies_key_function_before_sorting() -> None:
assert index_natsorted(given, key=itemgetter(1)) == expected
+def test_index_natsorted_can_presort() -> None:
+ expected = [2, 0, 3, 1]
+ given = ["a1", "a1.4500", "a01", "a1.45"]
+ result = index_natsorted(given, alg=ns.FLOAT | ns.PRESORT)
+ assert result == expected
+
+
def test_index_realsorted_is_identical_to_index_natsorted_with_real_alg(
float_list: List[str],
) -> None:
diff --git a/tests/test_ns_enum.py b/tests/test_ns_enum.py
index 7a30718..c950812 100644
--- a/tests/test_ns_enum.py
+++ b/tests/test_ns_enum.py
@@ -18,6 +18,7 @@ from natsort import ns
("NANLAST", 0x0400),
("COMPATIBILITYNORMALIZE", 0x0800),
("NUMAFTER", 0x1000),
+ ("PRESORT", 0x2000),
("DEFAULT", 0x0000),
("INT", 0x0000),
("UNSIGNED", 0x0000),
@@ -42,6 +43,7 @@ from natsort import ns
("NL", 0x0400),
("CN", 0x0800),
("NA", 0x1000),
+ ("PS", 0x2000),
],
)
def test_ns_enum(given: str, expected: int) -> None:
diff --git a/tests/test_os_sorted.py b/tests/test_os_sorted.py
index f714437..c29c110 100644
--- a/tests/test_os_sorted.py
+++ b/tests/test_os_sorted.py
@@ -47,6 +47,13 @@ def test_os_sorted_key() -> None:
assert result == expected
+def test_os_sorted_can_presort() -> None:
+ given = ["a1", "a01"]
+ expected = ["a01", "a1"]
+ result = natsort.os_sorted(given, presort=True)
+ assert result == expected
+
+
# The following is a master list of things that might give trouble
# when sorting like the file explorer.
given_characters = [
diff --git a/tests/test_parse_number_function.py b/tests/test_parse_number_function.py
index 85d6b96..5ac5700 100644
--- a/tests/test_parse_number_function.py
+++ b/tests/test_parse_number_function.py
@@ -20,7 +20,7 @@ from natsort.utils import NumTransformer, parse_number_or_none_factory
(ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, lambda x: ((("xx",), ("", x)),)),
],
)
-@given(x=floats(allow_nan=False) | integers())
+@given(x=floats(allow_nan=False, allow_infinity=False) | integers())
def test_parse_number_factory_makes_function_that_returns_tuple(
x: Union[float, int], alg: NSType, example_func: NumTransformer
) -> None:
@@ -32,10 +32,20 @@ def test_parse_number_factory_makes_function_that_returns_tuple(
"alg, x, result",
[
(ns.DEFAULT, 57, ("", 57)),
- (ns.DEFAULT, float("nan"), ("", float("-inf"))), # NaN transformed to -infinity
- (ns.NANLAST, float("nan"), ("", float("+inf"))), # NANLAST makes it +infinity
- (ns.DEFAULT, None, ("", float("-inf"))), # None transformed to -infinity
- (ns.NANLAST, None, ("", float("+inf"))), # NANLAST makes it +infinity
+ (
+ ns.DEFAULT,
+ float("nan"),
+ ("", float("-inf"), "2"),
+ ), # NaN transformed to -infinity
+ (
+ ns.NANLAST,
+ float("nan"),
+ ("", float("+inf"), "2"),
+ ), # NANLAST makes it +infinity
+ (ns.DEFAULT, None, ("", float("-inf"), "1")), # None transformed to -infinity
+ (ns.NANLAST, None, ("", float("+inf"), "1")), # NANLAST makes it +infinity
+ (ns.DEFAULT, float("-inf"), ("", float("-inf"), "3")),
+ (ns.NANLAST, float("+inf"), ("", float("+inf"), "3")),
],
)
def test_parse_number_factory_treats_nan_and_none_special(