7 files changed, 59 insertions, 8 deletions
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 8dfb799..d78fdf9 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -36,7 +36,7 @@ jobs:
         if: matrix.os == 'ubuntu-latest'
         run: |
           sudo apt-get update
-          sudo apt-get install language-pack-de language-pack-en
+          sudo apt-get install language-pack-de language-pack-en language-pack-cs
 
       - name: Install ICU
         if: matrix.extras
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 263c5f4..df15326 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,11 @@
 Unreleased
 ---
 
+### Fixed
+
+- Compose unicode characters when using locale to ensure sorting is correct
+  across all locales.
+
 [8.0.0] - 2021-11-03
 ---
 
diff --git a/natsort/compat/locale.py b/natsort/compat/locale.py
index 9af5e7a..b4c5356 100644
--- a/natsort/compat/locale.py
+++ b/natsort/compat/locale.py
@@ -54,7 +54,6 @@ try:  # noqa: C901
         sep = icu.DecimalFormatSymbols.kDecimalSeparatorSymbol
         return cast(str, icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep))
 
-
 except ImportError:
     import locale
     from locale import strxfrm
diff --git a/natsort/natsort.py b/natsort/natsort.py
index a95f9a9..9f34bc1 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -786,7 +786,6 @@ if platform.system() == "Windows":
             OSSortKeyType, lambda x: tuple(map(_winsort_key, _split_apply(x, key)))
         )
 
-
 else:
 
     # For UNIX-based platforms, ICU performs MUCH better than locale
diff --git a/natsort/utils.py b/natsort/utils.py
index 7102f41..c9448b4 100644
--- a/natsort/utils.py
+++ b/natsort/utils.py
@@ -235,6 +235,25 @@ def _normalize_input_factory(alg: NSType) -> StrToStr:
     return partial(normalize, normalization_form)
 
 
+def _compose_input_factory(alg: NSType) -> StrToStr:
+    """
+    Create a function that will compose unicode input data.
+
+    Parameters
+    ----------
+    alg : ns enum
+        Used to indicate how to compose unicode.
+
+    Returns
+    -------
+    func : callable
+        A function that accepts string (unicode) input and returns the
+        the input normalized with the desired composition scheme.
+    """
+    normalization_form = "NFKC" if alg & ns.COMPATIBILITYNORMALIZE else "NFC"
+    return partial(normalize, normalization_form)
+
+
 @overload
 def natsort_key(
     val: NatsortInType,
@@ -472,6 +491,7 @@ def parse_string_factory(
     orig_after_xfrm = not (alg & NS_DUMB and alg & ns.LOCALEALPHA)
     original_func = input_transform if orig_after_xfrm else _no_op
     normalize_input = _normalize_input_factory(alg)
+    compose_input = _compose_input_factory(alg) if alg & ns.LOCALEALPHA else _no_op
 
     def func(x: str) -> FinalTransform:
         # Apply string input transformation function and return to x.
@@ -479,11 +499,12 @@ def parse_string_factory(
         # to also be the transformation function.
         a = normalize_input(x)
         b, original = input_transform(a), original_func(a)
-        c = splitter(b)  # Split string into components.
-        d = filter(None, c)  # Remove empty strings.
-        e = map(component_transform, d)  # Apply transform on components.
-        f = sep_inserter(e, sep)  # Insert '' between numbers.
-        return final_transform(f, original)  # Apply the final transform.
+        c = compose_input(b)  # Decompose unicode if using LOCALE
+        d = splitter(c)  # Split string into components.
+        e = filter(None, d)  # Remove empty strings.
+        f = map(component_transform, e)  # Apply transform on components.
+        g = sep_inserter(f, sep)  # Insert '' between numbers.
+        return final_transform(g, original)  # Apply the final transform.
 
     return func
 
diff --git a/tests/conftest.py b/tests/conftest.py
index c63e149..cda2aaf 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,6 +7,7 @@ from typing import Iterator
 
 import hypothesis
 import pytest
+from natsort.compat.locale import dumb_sort
 
 
 # This disables the "too slow" hypothesis heath check globally.
@@ -48,3 +49,21 @@ def with_locale_de_de() -> Iterator[None]:
         yield
     finally:
         locale.setlocale(locale.LC_ALL, orig)
+
+
+@pytest.fixture()
+def with_locale_cs_cz() -> Iterator[None]:
+    """
+    Convenience to load the cs_CZ locale - reset when complete - skip if missing.
+    """
+    orig = locale.getlocale()
+    try:
+        load_locale("cs_CZ")
+        if dumb_sort():
+            pytest.skip("requires a functioning locale library to run")
+    except locale.Error:
+        pytest.skip("requires cs_CZ locale to be installed")
+    else:
+        yield
+    finally:
+        locale.setlocale(locale.LC_ALL, orig)
diff --git a/tests/test_natsorted.py b/tests/test_natsorted.py
index d043ab4..4a64a27 100644
--- a/tests/test_natsorted.py
+++ b/tests/test_natsorted.py
@@ -251,6 +251,14 @@ def test_natsorted_locale_bug_regression_test_109() -> None:
     assert natsorted(given, alg=ns.LOCALE) == expected
 
 
+@pytest.mark.usefixtures("with_locale_cs_cz")
+def test_natsorted_locale_bug_regression_test_140() -> None:
+    # https://github.com/SethMMorton/natsort/issues/140
+    given = ["Aš", "Cheb", "Česko", "Cibulov", "Znojmo", "Žilina"]
+    expected = ["Aš", "Cibulov", "Česko", "Cheb", "Znojmo", "Žilina"]
+    assert natsorted(given, alg=ns.LOCALE) == expected
+
+
 @pytest.mark.parametrize(
     "alg, expected",
     [