diff options
author | Seth M Morton <seth.m.morton@gmail.com> | 2018-02-08 23:10:30 -0800 |
---|---|---|
committer | Seth M Morton <seth.m.morton@gmail.com> | 2018-02-10 13:25:08 -0800 |
commit | d432715ef68470a45561e14c9e3e194bef97e525 (patch) | |
tree | 9574eb725ec6a689207d34b330eebbfdfaf27f2b | |
parent | 48b50476044860764f2e4cd1f76e0e152940abdc (diff) | |
download | natsort-d432715ef68470a45561e14c9e3e194bef97e525.tar.gz |
Add code to support placing numbers after non-numbers.
All infrastructure has been added, but no tests have been yet been
inserted to verify that the code is working properly. That will come
in the next commit.
-rw-r--r-- | natsort/compat/locale.py | 30 | ||||
-rw-r--r-- | natsort/natsort.py | 18 | ||||
-rw-r--r-- | natsort/ns_enum.py | 4 | ||||
-rw-r--r-- | natsort/utils.py | 10 | ||||
-rw-r--r-- | test_natsort/test_final_data_transform_factory.py | 20 | ||||
-rw-r--r-- | test_natsort/test_natsort_key.py | 10 | ||||
-rw-r--r-- | test_natsort/test_parse_number_function.py | 20 | ||||
-rw-r--r-- | test_natsort/test_utils.py | 1 |
8 files changed, 76 insertions, 37 deletions
diff --git a/natsort/compat/locale.py b/natsort/compat/locale.py index cbed495..a1cfa5a 100644 --- a/natsort/compat/locale.py +++ b/natsort/compat/locale.py @@ -6,8 +6,22 @@ from __future__ import ( absolute_import ) +# Std. lib imports. +import sys + # Local imports. -from natsort.compat.py23 import PY_VERSION, cmp_to_key +from natsort.compat.py23 import ( + PY_VERSION, + cmp_to_key, + py23_unichr, + py23_cmp, +) + +# This string should be sorted after any other byte string because +# it contains the max unicode character repeated 20 times. +# You would need some odd data to come after that. +null_string = '' +null_string_max = py23_unichr(sys.maxunicode) * 20 # Make the strxfrm function from strcoll on Python2 # It can be buggy (especially on BSD-based systems), @@ -18,6 +32,11 @@ try: null_string_locale = b'' + # This string should in theory be sorted after any other byte + # string because it contains the max byte char repeated many times. + # You would need some odd data to come after that. + null_string_locale_max = b'x7f' * 50 + def dumb_sort(): return False @@ -49,8 +68,15 @@ except ImportError: null_string_locale = '' + # This string should be sorted after any other byte string because + # it contains the max unicode character repeated 20 times. + # You would need some odd data to come after that. + null_string_locale_max = py23_unichr(sys.maxunicode) * 20 + if PY_VERSION < 3: - null_string_locale = cmp_to_key(cmp)(null_string_locale) + null_string_locale = cmp_to_key(py23_cmp)(null_string_locale) + null_string_locale_max = cmp_to_key(py23_cmp)(null_string_locale_max) + # On some systems, locale is broken and does not sort in the expected # order. We will try to detect this and compensate. def dumb_sort(): diff --git a/natsort/natsort.py b/natsort/natsort.py index 8703498..557e07b 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -207,16 +207,24 @@ def natsort_keygen(key=None, alg=0, **_kwargs): alg |= ns._DUMB # Set some variables that will be passed to the factory functions - if alg & ns.LOCALEALPHA: - sep = natsort.compat.locale.null_string_locale + if alg & ns.NUMAFTER: + if alg & ns.LOCALEALPHA: + sep = natsort.compat.locale.null_string_locale_max + else: + sep = natsort.compat.locale.null_string_max + pre_sep = natsort.compat.locale.null_string_max else: - sep = '' + if alg & ns.LOCALEALPHA: + sep = natsort.compat.locale.null_string_locale + else: + sep = natsort.compat.locale.null_string + pre_sep = natsort.compat.locale.null_string regex = _regex_chooser[alg & ns._NUMERIC_ONLY] # Create the functions that will be used to split strings. input_transform = _input_string_transform_factory(alg) component_transform = _string_component_transform_factory(alg) - final_transform = _final_data_transform_factory(alg, sep) + final_transform = _final_data_transform_factory(alg, sep, pre_sep) # Create the high-level parsing functions for strings, bytes, and numbers. string_func = _parse_string_factory( @@ -226,7 +234,7 @@ def natsort_keygen(key=None, alg=0, **_kwargs): if alg & ns.PATH: string_func = _parse_path_factory(string_func) bytes_func = _parse_bytes_factory(alg) - num_func = _parse_number_factory(alg, sep) + num_func = _parse_number_factory(alg, sep, pre_sep) # Return the natsort key with the parsing path pre-chosen. return partial( diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py index 37a00de..4216ddd 100644 --- a/natsort/ns_enum.py +++ b/natsort/ns_enum.py @@ -42,6 +42,9 @@ class ns(object): Tell `natsort` to not search for exponents as part of a float number. For example, with `NOEXP` the number "5.6E5" would be interpreted as `5.6`, `"E"`, and `5` instead of `560000`. + NUMAFTER, NA + Tell `natsort` to sort numbers after non-numbers. By default + numbers will be ordered before non-numbers. PATH, P Tell `natsort` to interpret strings as filesystem paths, so they will be split according to the filesystem separator @@ -151,6 +154,7 @@ class ns(object): CAPITALFIRST = C = UNGROUPLETTERS NANLAST = NL = 1 << 10 COMPATIBILITYNORMALIZE = CN = 1 << 11 + NUMAFTER = NA = 1 << 12 # The below are private options for internal use only. _NUMERIC_ONLY = REAL | NOEXP diff --git a/natsort/utils.py b/natsort/utils.py index c33de1d..b6484b0 100644 --- a/natsort/utils.py +++ b/natsort/utils.py @@ -197,7 +197,7 @@ def _parse_bytes_factory(alg): return lambda x: (x,) -def _parse_number_factory(alg, sep): +def _parse_number_factory(alg, sep, pre_sep): """Create a function that will properly format a number in a tuple.""" nan_replace = float('+inf') if alg & ns.NANLAST else float('-inf') @@ -207,9 +207,9 @@ def _parse_number_factory(alg, sep): # Return the function, possibly wrapping in tuple if PATH is selected. if alg & ns.PATH and alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: - return lambda x: ((('',), func(x)),) + return lambda x: (((pre_sep,), func(x)),) elif alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: - return lambda x: (('',), func(x)) + return lambda x: ((pre_sep,), func(x)) elif alg & ns.PATH: return lambda x: (func(x),) else: @@ -363,7 +363,7 @@ def _string_component_transform_factory(alg): return partial(fast_int, **kwargs) -def _final_data_transform_factory(alg, sep): +def _final_data_transform_factory(alg, sep, pre_sep): """ Given a set of natsort algorithms, return the function to operate on the post-parsed strings according to the user's request. @@ -383,7 +383,7 @@ def _final_data_transform_factory(alg, sep): if not split_val: return (), () elif split_val[0] == sep: - return ('',), split_val + return (pre_sep,), split_val else: return (transform(val[0]),), split_val return func diff --git a/test_natsort/test_final_data_transform_factory.py b/test_natsort/test_final_data_transform_factory.py index bfd3dfd..f0207e6 100644 --- a/test_natsort/test_final_data_transform_factory.py +++ b/test_natsort/test_final_data_transform_factory.py @@ -20,38 +20,38 @@ from hypothesis.strategies import ( def test_final_data_transform_factory_with_iterable_returns_tuple_with_no_options_example(): - assert _final_data_transform_factory(0, '')(iter([7]), '') == (7,) + assert _final_data_transform_factory(0, '', '')(iter([7]), '') == (7,) @given(text()) def test_final_data_transform_factory_with_iterable_returns_tuple_with_no_options(x): - assert _final_data_transform_factory(0, '')(iter([x]), '') == (x,) + assert _final_data_transform_factory(0, '', '')(iter([x]), '') == (x,) # UNGROUPLETTERS without LOCALE does nothing, as does LOCALE without UNGROUPLETTERS - assert _final_data_transform_factory(ns.UNGROUPLETTERS, '')(iter([x]), '') == _final_data_transform_factory(0, '')(iter([x]), '') - assert _final_data_transform_factory(ns.LOCALE, '')(iter([x]), '') == _final_data_transform_factory(0, '')(iter([x]), '') + assert _final_data_transform_factory(ns.UNGROUPLETTERS, '', '')(iter([x]), '') == _final_data_transform_factory(0, '', '')(iter([x]), '') + assert _final_data_transform_factory(ns.LOCALE, '', '')(iter([x]), '') == _final_data_transform_factory(0, '', '')(iter([x]), '') def test_final_data_transform_factory_with_empty_tuple_returns_double_empty_tuple(): - assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '')((), '') == ((), ()) + assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '', '')((), '') == ((), ()) def test_final_data_transform_factory_with_null_string_first_element_adds_empty_string_on_first_tuple_element(): - assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '')(('', 60), '') == (('',), ('', 60)) + assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '', 'xx')(('', 60), '') == (('xx',), ('', 60)) def test_final_data_transform_factory_returns_first_element_in_first_tuple_element_example(): - assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '')(('this', 60), 'this60') == (('t',), ('this', 60)) + assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '', '')(('this', 60), 'this60') == (('t',), ('this', 60)) @given(x=text().filter(bool), y=floats(allow_nan=False, allow_infinity=False) | integers()) def test_final_data_transform_factory_returns_first_element_in_first_tuple_element(x, y): - assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '')((x, y), ''.join(map(py23_str, [x, y]))) == ((x[0],), (x, y)) + assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '', '')((x, y), ''.join(map(py23_str, [x, y]))) == ((x[0],), (x, y)) def test_final_data_transform_factory_returns_first_element_in_first_tuple_element_caseswapped_with_DUMB_and_LOWERCASEFIRST_example(): - assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS | ns._DUMB | ns.LOWERCASEFIRST, '')(('this', 60), 'this60') == (('T',), ('this', 60)) + assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS | ns._DUMB | ns.LOWERCASEFIRST, '', '')(('this', 60), 'this60') == (('T',), ('this', 60)) @given(x=text().filter(bool), y=floats(allow_nan=False, allow_infinity=False) | integers()) def test_final_data_transform_factory_returns_first_element_in_first_tuple_element_caseswapped_with_DUMB_and_LOWERCASEFIRST(x, y): - assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS | ns._DUMB | ns.LOWERCASEFIRST, '')((x, y), ''.join(map(py23_str, [x, y]))) == ((x[0].swapcase(),), (x, y)) + assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS | ns._DUMB | ns.LOWERCASEFIRST, '', '')((x, y), ''.join(map(py23_str, [x, y]))) == ((x[0].swapcase(),), (x, y)) diff --git a/test_natsort/test_natsort_key.py b/test_natsort/test_natsort_key.py index a675cb2..9aabd11 100644 --- a/test_natsort/test_natsort_key.py +++ b/test_natsort/test_natsort_key.py @@ -34,10 +34,10 @@ if PY_VERSION >= 3: regex = _regex_chooser[ns.INT] pre = _input_string_transform_factory(ns.INT) post = _string_component_transform_factory(ns.INT) -after = _final_data_transform_factory(ns.INT, '') +after = _final_data_transform_factory(ns.INT, '', '') string_func = _parse_string_factory(ns.INT, '', regex.split, pre, post, after) bytes_func = _parse_bytes_factory(ns.INT) -num_func = _parse_number_factory(ns.INT, '') +num_func = _parse_number_factory(ns.INT, '', '') def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple(): @@ -45,7 +45,7 @@ def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple # so it will sort against the other as_path results. sfunc = _parse_path_factory(string_func) bytes_func = _parse_bytes_factory(ns.PATH) - num_func = _parse_number_factory(ns.PATH, '') + num_func = _parse_number_factory(ns.PATH, '', '') assert _natsort_key(10, None, sfunc, bytes_func, num_func) == (('', 10),) @@ -55,7 +55,7 @@ def test__natsort_key_with_bytes_input_and_PATH_returns_number_in_nested_tuple() # so it will sort against the other as_path results. sfunc = _parse_path_factory(string_func) bytes_func = _parse_bytes_factory(ns.PATH) - num_func = _parse_number_factory(ns.PATH, '') + num_func = _parse_number_factory(ns.PATH, '', '') assert _natsort_key(b'/hello/world', None, sfunc, bytes_func, num_func) == ((b'/hello/world',),) @@ -63,7 +63,7 @@ def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple() # PATH also handles recursion well. sfunc = _parse_path_factory(string_func) bytes_func = _parse_bytes_factory(ns.PATH) - num_func = _parse_number_factory(ns.PATH, '') + num_func = _parse_number_factory(ns.PATH, '', '') assert _natsort_key(('/Folder', '/Folder (1)'), None, sfunc, bytes_func, num_func) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')'))) diff --git a/test_natsort/test_parse_number_function.py b/test_natsort/test_parse_number_function.py index 163d066..2e7a9fe 100644 --- a/test_natsort/test_parse_number_function.py +++ b/test_natsort/test_parse_number_function.py @@ -18,38 +18,38 @@ from hypothesis.strategies import ( def test_parse_number_factory_makes_function_that_returns_tuple_example(): - assert _parse_number_factory(0, '')(57) == ('', 57) - assert _parse_number_factory(0, '')(float('nan')) == ('', float('-inf')) - assert _parse_number_factory(ns.NANLAST, '')(float('nan')) == ('', float('+inf')) + assert _parse_number_factory(0, '', '')(57) == ('', 57) + assert _parse_number_factory(0, '', '')(float('nan')) == ('', float('-inf')) + assert _parse_number_factory(ns.NANLAST, '', '')(float('nan')) == ('', float('+inf')) @given(floats(allow_nan=False) | integers()) def test_parse_number_factory_makes_function_that_returns_tuple(x): - assert _parse_number_factory(0, '')(x) == ('', x) + assert _parse_number_factory(0, '', '')(x) == ('', x) def test_parse_number_factory_with_PATH_makes_function_that_returns_nested_tuple_example(): - assert _parse_number_factory(ns.PATH, '')(57) == (('', 57),) + assert _parse_number_factory(ns.PATH, '', '')(57) == (('', 57),) @given(floats(allow_nan=False) | integers()) def test_parse_number_factory_with_PATH_makes_function_that_returns_nested_tuple(x): - assert _parse_number_factory(ns.PATH, '')(x) == (('', x),) + assert _parse_number_factory(ns.PATH, '', '')(x) == (('', x),) def test_parse_number_factory_with_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple_example(): - assert _parse_number_factory(ns.UNGROUPLETTERS | ns.LOCALE, '')(57) == (('',), ('', 57)) + assert _parse_number_factory(ns.UNGROUPLETTERS | ns.LOCALE, '', 'xx')(57) == (('xx',), ('', 57)) @given(floats(allow_nan=False) | integers()) def test_parse_number_factory_with_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple(x): - assert _parse_number_factory(ns.UNGROUPLETTERS | ns.LOCALE, '')(x) == (('',), ('', x)) + assert _parse_number_factory(ns.UNGROUPLETTERS | ns.LOCALE, '', 'xx')(x) == (('xx',), ('', x)) def test_parse_number_factory_with_PATH_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple_example(): - assert _parse_number_factory(ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, '')(57) == ((('',), ('', 57)),) + assert _parse_number_factory(ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, '', 'xx')(57) == ((('xx',), ('', 57)),) @given(floats(allow_nan=False) | integers()) def test_parse_number_factory_with_PATH_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple(x): - assert _parse_number_factory(ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, '')(x) == ((('',), ('', x)),) + assert _parse_number_factory(ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, '', 'xx')(x) == ((('xx',), ('', x)),) diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py index 3a367cb..10ad0fa 100644 --- a/test_natsort/test_utils.py +++ b/test_natsort/test_utils.py @@ -149,6 +149,7 @@ def test_ns_enum_values_have_are_as_expected(): assert ns.UNGROUPLETTERS == ns.CAPITALFIRST assert ns.NANLAST == ns.NL assert ns.COMPATIBILITYNORMALIZE == ns.CN + assert ns.NUMAFTER == ns.NA # Convenience assert ns.LOCALE == ns.LOCALEALPHA | ns.LOCALENUM |