summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth Morton <seth.m.morton@gmail.com>2022-01-30 14:12:34 -0800
committerGitHub <noreply@github.com>2022-01-30 14:12:34 -0800
commit24d7a4c73d655a72baae1acd8a988837a6cc8cac (patch)
treeb7b66a2f9cf08d36643a199bca105264004a020b
parent4f0b3a8b005a0a43d2c21cf0b1e11c89186603b3 (diff)
parent4832c1506833ea40592bc79d638e233495eb3558 (diff)
downloadnatsort-24d7a4c73d655a72baae1acd8a988837a6cc8cac.tar.gz
Merge pull request #146 from SethMMorton/over-zealous-extension-splitting
Over zealous extension splitting
-rw-r--r--CHANGELOG.md8
-rw-r--r--natsort/utils.py29
-rw-r--r--tests/test_natsorted.py15
-rw-r--r--tests/test_utils.py25
4 files changed, 59 insertions, 18 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2f81376..b1475fc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,18 +1,22 @@
Unreleased
---
+### Changed
+- When using `ns.PATH`, only split off a maximum of two suffixes from
+ a file name (issues #145, #146).
+
[8.0.2] - 2021-12-14
---
### Fixed
-- Bug where sorting paths fail if one of the paths is '.'.
+- Bug where sorting paths fail if one of the paths is '.' (issues #142, #143)
[8.0.1] - 2021-12-10
---
### Fixed
- Compose unicode characters when using locale to ensure sorting is correct
- across all locales.
+ across all locales (issues #140, #141)
[8.0.0] - 2021-11-03
---
diff --git a/natsort/utils.py b/natsort/utils.py
index 8d56b06..3832318 100644
--- a/natsort/utils.py
+++ b/natsort/utils.py
@@ -893,16 +893,21 @@ def path_splitter(
path_parts = []
base = str(s)
- # Now, split off the file extensions until we reach a decimal number at
- # the beginning of the suffix or there are no more extensions.
- suffixes = PurePath(base).suffixes
- try:
- digit_index = next(i for i, x in enumerate(reversed(suffixes)) if _d_match(x))
- except StopIteration:
- pass
- else:
- digit_index = len(suffixes) - digit_index
- suffixes = suffixes[digit_index:]
-
+ # Now, split off the file extensions until
+ # - we reach a decimal number at the beginning of the suffix
+ # - more than two suffixes have been seen
+ # - a suffix is more than five characters (including leading ".")
+ # - there are no more extensions
+ suffixes = []
+ for i, suffix in enumerate(reversed(PurePath(base).suffixes)):
+ if _d_match(suffix) or i > 1 or len(suffix) > 5:
+ break
+ suffixes.append(suffix)
+ suffixes.reverse()
+
+ # Remove the suffixes from the base component
base = base.replace("".join(suffixes), "")
- return filter(None, ichain(path_parts, [base], suffixes))
+ base_component = [base] if base else []
+
+ # Join all path comonents in an iterator
+ return filter(None, ichain(path_parts, base_component, suffixes))
diff --git a/tests/test_natsorted.py b/tests/test_natsorted.py
index 4a64a27..eb3aefe 100644
--- a/tests/test_natsorted.py
+++ b/tests/test_natsorted.py
@@ -182,6 +182,21 @@ def test_natsorted_handles_numbers_and_filesystem_paths_simultaneously() -> None
assert natsorted(given, alg=ns.PATH) == expected
+def test_natsorted_path_extensions_heuristic() -> None:
+ # https://github.com/SethMMorton/natsort/issues/145
+ given = [
+ "Try.Me.Bug - 09 - One.Two.Three.[text].mkv",
+ "Try.Me.Bug - 07 - One.Two.5.[text].mkv",
+ "Try.Me.Bug - 08 - One.Two.Three[text].mkv",
+ ]
+ expected = [
+ "Try.Me.Bug - 07 - One.Two.5.[text].mkv",
+ "Try.Me.Bug - 08 - One.Two.Three[text].mkv",
+ "Try.Me.Bug - 09 - One.Two.Three.[text].mkv",
+ ]
+ assert natsorted(given, alg=ns.PATH) == expected
+
+
@pytest.mark.parametrize(
"alg, expected",
[
diff --git a/tests/test_utils.py b/tests/test_utils.py
index bb229b9..b140682 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -6,7 +6,7 @@ import pathlib
import string
from itertools import chain
from operator import neg as op_neg
-from typing import List, Pattern, Union
+from typing import List, Pattern, Tuple, Union
import pytest
from hypothesis import given
@@ -155,9 +155,26 @@ def test_path_splitter_splits_path_string_by_sep(x: List[str]) -> None:
assert tuple(utils.path_splitter(z)) == tuple(pathlib.Path(z).parts)
-def test_path_splitter_splits_path_string_by_sep_and_removes_extension_example() -> None:
- given = "/this/is/a/path/file.x1.10.tar.gz"
- expected = (os.sep, "this", "is", "a", "path", "file.x1.10", ".tar", ".gz")
+@pytest.mark.parametrize(
+ "given, expected",
+ [
+ (
+ "/this/is/a/path/file.x1.10.tar.gz",
+ (os.sep, "this", "is", "a", "path", "file.x1.10", ".tar", ".gz"),
+ ),
+ (
+ "/this/is/a/path/file.x1.10.tar",
+ (os.sep, "this", "is", "a", "path", "file.x1.10", ".tar"),
+ ),
+ (
+ "/this/is/a/path/file.x1.threethousand.tar",
+ (os.sep, "this", "is", "a", "path", "file.x1.threethousand", ".tar"),
+ ),
+ ],
+)
+def test_path_splitter_splits_path_string_by_sep_and_removes_extension_example(
+ given: str, expected: Tuple[str, ...]
+) -> None:
assert tuple(utils.path_splitter(given)) == tuple(expected)