From 4ae64aa2ee7f254c82ab4700147f7480eff807d7 Mon Sep 17 00:00:00 2001 From: Seth Morton Date: Sun, 26 Feb 2023 23:32:02 -0800 Subject: Enable new, more performant fastnumbers The new version can convert the entire mapping internally, so we use that if possible. A new wrapper for this new function is introduced to make the calling code consistent. --- natsort/compat/fake_fastnumbers.py | 12 +++---- natsort/compat/fastnumbers.py | 42 ++++++++++++++++++++++-- natsort/utils.py | 16 ++++----- tests/test_fake_fastnumbers.py | 18 +++++----- tests/test_parse_string_function.py | 4 +-- tests/test_string_component_transform_factory.py | 25 +++++++------- 6 files changed, 76 insertions(+), 41 deletions(-) diff --git a/natsort/compat/fake_fastnumbers.py b/natsort/compat/fake_fastnumbers.py index 5d44605..cb1c900 100644 --- a/natsort/compat/fake_fastnumbers.py +++ b/natsort/compat/fake_fastnumbers.py @@ -4,7 +4,7 @@ This module is intended to replicate some of the functionality from the fastnumbers module in the event that module is not installed. """ import unicodedata -from typing import Callable, FrozenSet, Optional, Union +from typing import Callable, FrozenSet, Union from natsort.unicode_numbers import decimal_chars @@ -35,11 +35,10 @@ StrOrFloat = Union[str, float] StrOrInt = Union[str, int] -# noinspection PyIncorrectDocstring def fast_float( x: str, - key: Callable[[str], StrOrFloat] = lambda x: x, - nan: Optional[StrOrFloat] = None, + key: Callable[[str], str] = lambda x: x, + nan: float = float("inf"), _uni: Callable[[str, StrOrFloat], StrOrFloat] = unicodedata.numeric, _nan_inf: FrozenSet[str] = NAN_INF, _first_char: FrozenSet[str] = POTENTIAL_FIRST_CHAR, @@ -67,7 +66,7 @@ def fast_float( if x[0] in _first_char or x.lstrip()[:3] in _nan_inf: try: ret = float(x) - return nan if nan is not None and ret != ret else ret + return nan if ret != ret else ret except ValueError: try: return _uni(x, key(x)) if len(x) == 1 else key(x) @@ -80,10 +79,9 @@ def fast_float( return key(x) -# noinspection PyIncorrectDocstring def fast_int( x: str, - key: Callable[[str], StrOrInt] = lambda x: x, + key: Callable[[str], str] = lambda x: x, _uni: Callable[[str, StrOrInt], StrOrInt] = unicodedata.digit, _first_char: FrozenSet[str] = POTENTIAL_FIRST_CHAR, ) -> StrOrInt: diff --git a/natsort/compat/fastnumbers.py b/natsort/compat/fastnumbers.py index 049030d..b4ae5cc 100644 --- a/natsort/compat/fastnumbers.py +++ b/natsort/compat/fastnumbers.py @@ -4,11 +4,17 @@ Interface for natsort to access fastnumbers functions without having to worry if it is actually installed. """ import re +from typing import Callable, Iterable, Iterator, Literal, Tuple, Union -__all__ = ["fast_float", "fast_int"] +StrOrFloat = Union[str, float] +StrOrInt = Union[str, int] +__all__ = ["try_float", "try_int"] -def is_supported_fastnumbers(fastnumbers_version: str) -> bool: + +def is_supported_fastnumbers( + fastnumbers_version: str, minimum: Tuple[int, int, int] = (2, 0, 0) +) -> bool: match = re.match( r"^(\d+)\.(\d+)(\.(\d+))?([ab](\d+))?$", fastnumbers_version, @@ -22,7 +28,7 @@ def is_supported_fastnumbers(fastnumbers_version: str) -> bool: (major, minor, patch) = match.group(1, 2, 4) - return (int(major), int(minor), int(patch)) >= (2, 0, 0) + return (int(major), int(minor), int(patch)) >= minimum # If the user has fastnumbers installed, they will get great speed @@ -34,5 +40,35 @@ try: # Require >= version 2.0.0. if not is_supported_fastnumbers(fn_ver): raise ImportError # pragma: no cover + + # For versions of fastnumbers with mapping capability, use that + if is_supported_fastnumbers(fn_ver, (5, 0, 0)): + del fast_float, fast_int + from fastnumbers import try_float, try_int except ImportError: from natsort.compat.fake_fastnumbers import fast_float, fast_int # type: ignore + +# Re-map the old-or-compatibility functions fast_float/fast_int to the +# newer API of try_float/try_int. If we already imported try_float/try_int +# then there is nothing to do. +if "try_float" not in globals(): + + def try_float( # noqa: F811, type: ignore[no-redef] + x: Iterable[str], + map: Literal[True], + nan: float = float("inf"), + on_fail: Callable[[str], str] = lambda x: x, + ) -> Iterator[StrOrFloat]: + assert map is True + return (fast_float(y, nan=nan, key=on_fail) for y in x) + + +if "try_int" not in globals(): + + def try_int( # noqa: F811, type: ignore[no-redef] + x: Iterable[str], + map: Literal[True], + on_fail: Callable[[str], str] = lambda x: x, + ) -> Iterator[StrOrInt]: + assert map is True + return (fast_int(y, key=on_fail) for y in x) diff --git a/natsort/utils.py b/natsort/utils.py index 062b1c6..b86225e 100644 --- a/natsort/utils.py +++ b/natsort/utils.py @@ -61,7 +61,7 @@ from typing import ( ) from unicodedata import normalize -from natsort.compat.fastnumbers import fast_float, fast_int +from natsort.compat.fastnumbers import try_float, try_int from natsort.compat.locale import ( StrOrBytes, get_decimal_point, @@ -111,7 +111,7 @@ NumTransformer = Callable[[Any], NumTransform] # For the string component transform factory StrBytesNum = Union[str, bytes, float, int] -StrTransformer = Callable[[str], StrBytesNum] +StrTransformer = Callable[[Iterable[str]], Iterator[StrBytesNum]] # For the final data transform factory FinalTransform = AnyTuple @@ -505,7 +505,7 @@ def parse_string_factory( c = compose_input(b) # Decompose unicode if using LOCALE d = splitter(c) # Split string into components. e = filter(None, d) # Remove empty strings. - f = map(component_transform, e) # Apply transform on components. + f = component_transform(e) # Apply transform on components. g = sep_inserter(f, sep) # Insert '' between numbers. return final_transform(g, original) # Apply the final transform. @@ -688,14 +688,14 @@ def string_component_transform_factory(alg: NSType) -> StrTransformer: func_chain.append(get_strxfrm()) # Return the correct chained functions. - kwargs: Dict[str, Union[float, Callable[[str], StrOrBytes]]] - kwargs = {"key": chain_functions(func_chain)} if func_chain else {} + kwargs: Dict[str, Union[float, Callable[[str], StrOrBytes], bool]] + kwargs = {"on_fail": chain_functions(func_chain)} if func_chain else {} + kwargs["map"] = True if alg & ns.FLOAT: - # noinspection PyTypeChecker kwargs["nan"] = nan_val - return cast(Callable[[str], StrOrBytes], partial(fast_float, **kwargs)) + return cast(StrTransformer, partial(try_float, **kwargs)) else: - return cast(Callable[[str], StrOrBytes], partial(fast_int, **kwargs)) + return cast(StrTransformer, partial(try_int, **kwargs)) def final_data_transform_factory( diff --git a/tests/test_fake_fastnumbers.py b/tests/test_fake_fastnumbers.py index 574f7cf..6324c64 100644 --- a/tests/test_fake_fastnumbers.py +++ b/tests/test_fake_fastnumbers.py @@ -4,7 +4,7 @@ Test the fake fastnumbers module. """ import unicodedata -from math import isnan +from math import isinf from typing import Union, cast from hypothesis import given @@ -62,10 +62,10 @@ def test_fast_float_returns_nan_alternate_if_nan_option_is_given() -> None: def test_fast_float_converts_float_string_to_float_example() -> None: assert fast_float("45.8") == 45.8 assert fast_float("-45") == -45.0 - assert fast_float("45.8e-2", key=len) == 45.8e-2 - assert isnan(cast(float, fast_float("nan"))) - assert isnan(cast(float, fast_float("+nan"))) - assert isnan(cast(float, fast_float("-NaN"))) + assert fast_float("45.8e-2", key=lambda x: x.upper()) == 45.8e-2 + assert isinf(cast(float, fast_float("nan"))) + assert isinf(cast(float, fast_float("+nan"))) + assert isinf(cast(float, fast_float("-NaN"))) assert fast_float("۱۲.۱۲") == 12.12 assert fast_float("-۱۲.۱۲") == -12.12 @@ -85,12 +85,12 @@ def test_fast_float_leaves_string_as_is(x: str) -> None: def test_fast_float_with_key_applies_to_string_example() -> None: - assert fast_float("invalid", key=len) == len("invalid") + assert fast_float("invalid", key=lambda x: x.upper()) == "INVALID" @given(text().filter(not_a_float).filter(bool)) def test_fast_float_with_key_applies_to_string(x: str) -> None: - assert fast_float(x, key=len) == len(x) + assert fast_float(x, key=lambda x: x.upper()) == x.upper() def test_fast_int_leaves_float_string_as_is_example() -> None: @@ -126,9 +126,9 @@ def test_fast_int_leaves_string_as_is(x: str) -> None: def test_fast_int_with_key_applies_to_string_example() -> None: - assert fast_int("invalid", key=len) == len("invalid") + assert fast_int("invalid", key=lambda x: x.upper()) == "INVALID" @given(text().filter(not_an_int).filter(bool)) def test_fast_int_with_key_applies_to_string(x: str) -> None: - assert fast_int(x, key=len) == len(x) + assert fast_int(x, key=lambda x: x.upper()) == x.upper() diff --git a/tests/test_parse_string_function.py b/tests/test_parse_string_function.py index 653a065..d2d33a4 100644 --- a/tests/test_parse_string_function.py +++ b/tests/test_parse_string_function.py @@ -7,7 +7,7 @@ from typing import Any, Callable, Iterable, List, Tuple, Union import pytest from hypothesis import given from hypothesis.strategies import floats, integers, lists, text -from natsort.compat.fastnumbers import fast_float +from natsort.compat.fastnumbers import try_float from natsort.ns_enum import NSType, NS_DUMB, ns from natsort.utils import ( FinalTransform, @@ -46,7 +46,7 @@ def parse_string_func_factory(alg: NSType) -> StrParser: sep, NumRegex.int_nosign().split, input_transform, - fast_float, + lambda x: try_float(x, map=True), final_transform, ) diff --git a/tests/test_string_component_transform_factory.py b/tests/test_string_component_transform_factory.py index 99df7ea..78d37bf 100644 --- a/tests/test_string_component_transform_factory.py +++ b/tests/test_string_component_transform_factory.py @@ -7,7 +7,7 @@ from typing import Any, Callable, FrozenSet, Union import pytest from hypothesis import example, given from hypothesis.strategies import floats, integers, text -from natsort.compat.fastnumbers import fast_float, fast_int +from natsort.compat.fastnumbers import try_float, try_int from natsort.compat.locale import get_strxfrm from natsort.ns_enum import NSType, NS_DUMB, ns from natsort.utils import groupletters, string_component_transform_factory @@ -35,25 +35,25 @@ def no_null(x: str) -> bool: @pytest.mark.parametrize( "alg, example_func", [ - (ns.INT, fast_int), - (ns.DEFAULT, fast_int), - (ns.FLOAT, partial(fast_float, nan=float("-inf"))), - (ns.FLOAT | ns.NANLAST, partial(fast_float, nan=float("+inf"))), - (ns.GROUPLETTERS, partial(fast_int, key=groupletters)), - (ns.LOCALE, partial(fast_int, key=lambda x: get_strxfrm()(x))), + (ns.INT, partial(try_int, map=True)), + (ns.DEFAULT, partial(try_int, map=True)), + (ns.FLOAT, partial(try_float, map=True, nan=float("-inf"))), + (ns.FLOAT | ns.NANLAST, partial(try_float, map=True, nan=float("+inf"))), + (ns.GROUPLETTERS, partial(try_int, map=True, on_fail=groupletters)), + (ns.LOCALE, partial(try_int, map=True, on_fail=lambda x: get_strxfrm()(x))), ( ns.GROUPLETTERS | ns.LOCALE, - partial(fast_int, key=lambda x: get_strxfrm()(groupletters(x))), + partial(try_int, map=True, on_fail=lambda x: get_strxfrm()(groupletters(x))), ), ( NS_DUMB | ns.LOCALE, - partial(fast_int, key=lambda x: get_strxfrm()(groupletters(x))), + partial(try_int, map=True, on_fail=lambda x: get_strxfrm()(groupletters(x))), ), ( ns.GROUPLETTERS | ns.LOCALE | ns.FLOAT | ns.NANLAST, partial( - fast_float, - key=lambda x: get_strxfrm()(groupletters(x)), + try_float, map=True, + on_fail=lambda x: get_strxfrm()(groupletters(x)), nan=float("+inf"), ), ), @@ -70,8 +70,9 @@ def test_string_component_transform_factory( x: Union[str, float, int], alg: NSType, example_func: Callable[[str], Any] ) -> None: string_component_transform_func = string_component_transform_factory(alg) + x = str(x) try: - assert string_component_transform_func(str(x)) == example_func(str(x)) + assert list(string_component_transform_func(x)) == list(example_func(x)) except ValueError as e: # handle broken locale lib on BSD. if "is not in range" not in str(e): raise -- cgit v1.2.1