From fb10929384240fb64c361fb0b58318f8b08f4a45 Mon Sep 17 00:00:00 2001 From: Seth Morton Date: Tue, 18 Apr 2023 21:33:55 -0700 Subject: Reduce the finger pointing at BSD Turns out a fair amount of the problems were with natsort itself. --- CHANGELOG.md | 2 +- natsort/compat/locale.py | 4 ++-- tests/test_string_component_transform_factory.py | 13 +++++-------- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a79119f..f4811d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ Unreleased --- ### Fixed -- Broken test on FreeBSD due to a broken `locale.strxfrm`. +- Broken test found on FreeBSD. **This change has no effect outside fixing tests** (issue [#161](https://github.com/SethMMorton/natsort/issues/161)) diff --git a/natsort/compat/locale.py b/natsort/compat/locale.py index d802194..b1b7c00 100644 --- a/natsort/compat/locale.py +++ b/natsort/compat/locale.py @@ -20,8 +20,8 @@ null_string_max = chr(sys.maxunicode) * 20 null_string_locale: StrOrBytes null_string_locale_max: StrOrBytes -# strxfrm can be buggy (especially on BSD-based systems), -# so prefer icu if available. +# strxfrm can be buggy (especially on OSX and *possibly* some other +# BSD-based systems), so prefer icu if available. try: # noqa: C901 import icu from locale import getlocale diff --git a/tests/test_string_component_transform_factory.py b/tests/test_string_component_transform_factory.py index 40b4d34..03dea8c 100644 --- a/tests/test_string_component_transform_factory.py +++ b/tests/test_string_component_transform_factory.py @@ -13,8 +13,8 @@ from natsort.ns_enum import NSType, NS_DUMB, ns from natsort.utils import groupletters, string_component_transform_factory # There are some unicode values that are known failures with the builtin locale -# library on BSD systems that has nothing to do with natsort (a ValueError is -# raised by strxfrm). Let's filter them out. +# library on OSX and some other BSD-based systems that has nothing to do with +# natsort (a ValueError is raised by strxfrm). Let's filter them out. try: bad_uni_chars = frozenset(chr(x) for x in range(0x10FEFD, 0x10FFFF + 1)) except ValueError: @@ -34,10 +34,7 @@ def no_null(x: str) -> bool: def input_is_ok_with_locale(x: str) -> bool: """Ensure this input won't cause locale.strxfrm to barf""" - # On FreeBSD, locale.strxfrm raises an OSError on input like 'Å'. - # You read that right - an *OSError* for invalid input. - # We cannot really fix that, so we just filter out any value - # that could cause locale.strxfrm to barf with this function. + # Bad input can cause an OSError if the OS doesn't support the value try: get_strxfrm()(x) except OSError: @@ -91,9 +88,9 @@ def test_string_component_transform_factory( ) -> None: string_component_transform_func = string_component_transform_factory(alg) x = str(x) - assume(input_is_ok_with_locale(x)) # handle broken locale lib on BSD. + assume(input_is_ok_with_locale(x)) try: assert list(string_component_transform_func(x)) == list(example_func(x)) - except ValueError as e: # handle broken locale lib on BSD. + except ValueError as e: # handle broken locale lib on OSX. if "is not in range" not in str(e): raise -- cgit v1.2.1