diff options
author | Seth M Morton <seth.m.morton@gmail.com> | 2016-05-05 17:09:06 -0700 |
---|---|---|
committer | Seth M Morton <seth.m.morton@gmail.com> | 2016-05-05 17:09:06 -0700 |
commit | 3df99572f21d82975ea4dff60ad2695b9c0e1504 (patch) | |
tree | 60c571b1f7fdcbcc4a8b79ecc69adf4a12c91ec4 | |
parent | 06c917cf795fd55f188238afbe24ffdb9c215309 (diff) | |
download | natsort-3df99572f21d82975ea4dff60ad2695b9c0e1504.tar.gz |
Separated LOCALE into LOCALEALPHA and LOCALENUM.
This is so users can control if they want numeric modifications or not.
-rw-r--r-- | natsort/natsort.py | 4 | ||||
-rw-r--r-- | natsort/ns_enum.py | 35 | ||||
-rw-r--r-- | natsort/utils.py | 43 | ||||
-rw-r--r-- | test_natsort/test_natsort_keygen.py | 12 | ||||
-rw-r--r-- | test_natsort/test_utils.py | 3 |
5 files changed, 57 insertions, 40 deletions
diff --git a/natsort/natsort.py b/natsort/natsort.py index 7239b2f..3a5caec 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -202,11 +202,11 @@ def natsort_keygen(key=None, alg=0, **_kwargs): raise ValueError(msg+', got {0}'.format(py23_str(alg))) # Add the _DUMB option if the locale library is broken. - if alg & ns.LOCALE and natsort.compat.locale.dumb_sort(): + if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort(): alg |= ns._DUMB # Set some variable that will be passed to the factory functions - sep = natsort.compat.locale.null_string if alg & ns.LOCALE else '' + sep = natsort.compat.locale.null_string if alg & ns.LOCALEALPHA else '' regex = _regex_chooser[alg & ns._NUMERIC_ONLY] # Create the functions that will be used to split strings. diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py index a4e1c44..b88bdba 100644 --- a/natsort/ns_enum.py +++ b/natsort/ns_enum.py @@ -65,12 +65,18 @@ class ns(object): front. It is the same as setting the old `as_path` option to `True`. LOCALE, L - Tell `natsort` to be locale-aware when sorting strings (everything - that was not converted to a number). Your sorting results will vary - depending on your current locale. Generally, the `GROUPLETTERS` - option is not needed with `LOCALE` because the `locale` library - groups the letters in the same manner (although you may still - need `GROUPLETTERS` if there are numbers in your strings). + Tell `natsort` to be locale-aware when sorting. This includes both + proper sorting of alphabetical characters as well as proper + handling of locale-dependent decimal separators and thousands + separators. This is a shortcut for + ``ns.LOCALEALPHA | ns.LOCALENUM``. + Your sorting results will vary depending on your current locale. + LOCALEALPHA, LA + Tell `natsort` to be locale-aware when sorting, but only for + alphabetical characters. + LOCALENUM, LN + Tell `natsort` to be locale-aware when sorting, but only for + decimal separators and thousands separators. IGNORECASE, IC Tell `natsort` to ignore case when sorting. For example, ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as @@ -93,7 +99,8 @@ class ns(object): ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as ``['Apple', 'apple', 'Banana', 'banana']``. Useless when used with `IGNORECASE`; use with `LOWERCASEFIRST` - to reverse the order of upper and lower case. + to reverse the order of upper and lower case. Generally not + needed with `LOCALE`. CAPITALFIRST, C Only used when `LOCALE` is enabled. Tell `natsort` to put all capitalized words before non-capitalized words. This is essentially @@ -148,13 +155,15 @@ class ns(object): REAL = R = FLOAT | SIGNED NOEXP = N = 1 << 2 PATH = P = 1 << 3 - LOCALE = L = 1 << 4 - IGNORECASE = IC = 1 << 5 - LOWERCASEFIRST = LF = 1 << 6 - GROUPLETTERS = G = 1 << 7 - UNGROUPLETTERS = UG = 1 << 8 + LOCALEALPHA = LA = 1 << 4 + LOCALENUM = LN = 1 << 5 + LOCALE = L = LOCALEALPHA | LOCALENUM + IGNORECASE = IC = 1 << 6 + LOWERCASEFIRST = LF = 1 << 7 + GROUPLETTERS = G = 1 << 8 + UNGROUPLETTERS = UG = 1 << 9 CAPITALFIRST = C = UNGROUPLETTERS - NANLAST = NL = 1 << 9 + NANLAST = NL = 1 << 10 # The below are private options for internal use only. _NUMERIC_ONLY = REAL | NOEXP diff --git a/natsort/utils.py b/natsort/utils.py index 98e6ea1..23f83b2 100644 --- a/natsort/utils.py +++ b/natsort/utils.py @@ -124,7 +124,7 @@ def _natsort_key(val, key, string_func, bytes_func, num_func): def _parse_bytes_function(alg): """Create a function that will format a bytes string in a tuple.""" - # We don't worry about ns.UNGROUPLETTERS | ns.LOCALE because + # We don't worry about ns.UNGROUPLETTERS | ns.LOCALEALPHA because # bytes cannot be compared to strings. if alg & ns.PATH and alg & ns.IGNORECASE: return lambda x: ((x.lower(),),) @@ -145,9 +145,9 @@ def _parse_number_function(alg, sep): return (sep, nan_replace if val != val else val) # Return the function, possibly wrapping in tuple if PATH is selected. - if alg & ns.PATH and alg & ns.UNGROUPLETTERS and alg & ns.LOCALE: + if alg & ns.PATH and alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: return lambda x: ((('',), func(x)),) - elif alg & ns.UNGROUPLETTERS and alg & ns.LOCALE: + elif alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: return lambda x: (('',), func(x)) elif alg & ns.PATH: return lambda x: (func(x),) @@ -157,7 +157,7 @@ def _parse_number_function(alg, sep): def _parse_string_function(alg, sep, splitter, pre, post, after): """Create a function that will properly split and format a string.""" - if not (alg & ns._DUMB and alg & ns.LOCALE): + if not (alg & ns._DUMB and alg & ns.LOCALEALPHA): def func(x): x = pre(x) # Apply pre-splitting function original = x @@ -216,21 +216,6 @@ def _pre_split_function(alg): lowfirst = alg & ns.LOWERCASEFIRST dumb = alg & ns._DUMB - # Create a regular expression that will change the decimal point to - # a period if not already a period. - decimal = get_decimal_point() - - switch_decimal = r'(?<=[0-9]){decimal}|{decimal}(?=[0-9])' - switch_decimal = switch_decimal.format(decimal=decimal) - switch_decimal = re.compile(switch_decimal) - - # Create a regular expression that will remove thousands seprarators. - thousands = get_thousands_sep() - strip_thousands = (r'(?<![0-9]{{4}})(?<=[0-9]{{1}})' - r'{thousands}(?=[0-9]{{3}}([^0-9]|$))') - strip_thousands = strip_thousands.format(thousands=thousands) - strip_thousands = re.compile(strip_thousands) - # Build the chain of functions to execute in order. function_chain = [] if (dumb and not lowfirst) or (lowfirst and not dumb): @@ -240,9 +225,23 @@ def _pre_split_function(alg): function_chain.append(methodcaller('casefold')) else: function_chain.append(methodcaller('lower')) - if alg & ns.LOCALE: + + if alg & ns.LOCALENUM: + # Create a regular expression that will remove thousands seprarators. + thousands = get_thousands_sep() + strip_thousands = (r'(?<![0-9]{{4}})(?<=[0-9]{{1}})' + r'{thousands}(?=[0-9]{{3}}([^0-9]|$))') + strip_thousands = strip_thousands.format(thousands=thousands) + strip_thousands = re.compile(strip_thousands) function_chain.append(partial(strip_thousands.sub, '')) + + # Create a regular expression that will change the decimal point to + # a period if not already a period. + decimal = get_decimal_point() if decimal != '.': + switch_decimal = r'(?<=[0-9]){decimal}|{decimal}(?=[0-9])' + switch_decimal = switch_decimal.format(decimal=decimal) + switch_decimal = re.compile(switch_decimal) function_chain.append(partial(switch_decimal.sub, '.')) # Return the chained functions. @@ -255,7 +254,7 @@ def _post_split_function(alg): on the post-split strings according to the user's request. """ # Shortcuts. - use_locale = alg & ns.LOCALE + use_locale = alg & ns.LOCALEALPHA dumb = alg & ns._DUMB group_letters = (alg & ns.GROUPLETTERS) or (use_locale and dumb) nan_val = float('+inf') if alg & ns.NANLAST else float('-inf') @@ -281,7 +280,7 @@ def _post_string_parse_function(alg, sep): Given a set of natsort algorithms, return the function to operate on the post-parsed strings according to the user's request. """ - if alg & ns.UNGROUPLETTERS and alg & ns.LOCALE: + if alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: swap = alg & ns._DUMB and alg & ns.LOWERCASEFIRST def func(split_val, diff --git a/test_natsort/test_natsort_keygen.py b/test_natsort/test_natsort_keygen.py index 3f48925..e94d3d3 100644 --- a/test_natsort/test_natsort_keygen.py +++ b/test_natsort/test_natsort_keygen.py @@ -6,6 +6,7 @@ See the README or the natsort homepage for more details. from __future__ import unicode_literals, print_function import warnings +import locale from pytest import raises from natsort import ( natsorted, @@ -19,6 +20,7 @@ from natsort.compat.locale import ( get_strxfrm, ) from compat.mock import patch +from compat.locale import load_locale INPUT = ['6A-5.034e+1', '/Folder (1)/Foo', 56.7] @@ -74,19 +76,23 @@ def test_natsort_keygen_splits_input_with_lowercasefirst_noexp_float(): def test_natsort_keygen_splits_input_with_locale(): + load_locale('en_US') strxfrm = get_strxfrm() with patch('natsort.compat.locale.dumb_sort', return_value=False): assert natsort_keygen(alg=ns.L)(INPUT) == ((null_string, 6, strxfrm('A-'), 5, strxfrm('.'), 34, strxfrm('e+'), 1), (strxfrm('/Folder ('), 1, strxfrm(')/Foo')), (null_string, 56.7)) with patch('natsort.compat.locale.dumb_sort', return_value=True): assert natsort_keygen(alg=ns.L)(INPUT) == ((null_string, 6, strxfrm('aa--'), 5, strxfrm('..'), 34, strxfrm('eE++'), 1), (strxfrm('//ffoOlLdDeErR (('), 1, strxfrm('))//ffoOoO')), (null_string, 56.7)) - if PY_VERSION >= 3: assert natsort_keygen(alg=ns.L)(b'6A-5.034e+1') == (b'6A-5.034e+1',) + if PY_VERSION >= 3: assert natsort_keygen(alg=ns.LA)(b'6A-5.034e+1') == (b'6A-5.034e+1',) + locale.setlocale(locale.LC_ALL, str('')) def test_natsort_keygen_splits_input_with_locale_and_capitalfirst(): + load_locale('en_US') strxfrm = get_strxfrm() with patch('natsort.compat.locale.dumb_sort', return_value=False): - assert natsort_keygen(alg=ns.L | ns.C)(INPUT) == ((('',), (null_string, 6, strxfrm('A-'), 5, strxfrm('.'), 34, strxfrm('e+'), 1)), (('/',), (strxfrm('/Folder ('), 1, strxfrm(')/Foo'))), (('',), (null_string, 56.7))) - if PY_VERSION >= 3: assert natsort_keygen(alg=ns.L | ns.C)(b'6A-5.034e+1') == (b'6A-5.034e+1',) + assert natsort_keygen(alg=ns.LA | ns.C)(INPUT) == ((('',), (null_string, 6, strxfrm('A-'), 5, strxfrm('.'), 34, strxfrm('e+'), 1)), (('/',), (strxfrm('/Folder ('), 1, strxfrm(')/Foo'))), (('',), (null_string, 56.7))) + if PY_VERSION >= 3: assert natsort_keygen(alg=ns.LA | ns.C)(b'6A-5.034e+1') == (b'6A-5.034e+1',) + locale.setlocale(locale.LC_ALL, str('')) def test_natsort_keygen_splits_input_with_path(): diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py index 406eed8..1e04573 100644 --- a/test_natsort/test_utils.py +++ b/test_natsort/test_utils.py @@ -139,6 +139,8 @@ def test_ns_enum_values_have_are_as_expected(): assert ns.SIGNED == ns.S assert ns.NOEXP == ns.N assert ns.PATH == ns.P + assert ns.LOCALEALPHA == ns.LA + assert ns.LOCALENUM == ns.LN assert ns.LOCALE == ns.L assert ns.IGNORECASE == ns.IC assert ns.LOWERCASEFIRST == ns.LF @@ -149,6 +151,7 @@ def test_ns_enum_values_have_are_as_expected(): assert ns.NANLAST == ns.NL # Convenience + assert ns.LOCALE == ns.LOCALEALPHA | ns.LOCALENUM assert ns.REAL == ns.FLOAT | ns.SIGNED assert ns._NUMERIC_ONLY == ns.REAL | ns.NOEXP |