summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth M Morton <seth.m.morton@gmail.com>2016-05-05 17:09:06 -0700
committerSeth M Morton <seth.m.morton@gmail.com>2016-05-05 17:09:06 -0700
commit3df99572f21d82975ea4dff60ad2695b9c0e1504 (patch)
tree60c571b1f7fdcbcc4a8b79ecc69adf4a12c91ec4
parent06c917cf795fd55f188238afbe24ffdb9c215309 (diff)
downloadnatsort-3df99572f21d82975ea4dff60ad2695b9c0e1504.tar.gz
Separated LOCALE into LOCALEALPHA and LOCALENUM.
This is so users can control if they want numeric modifications or not.
-rw-r--r--natsort/natsort.py4
-rw-r--r--natsort/ns_enum.py35
-rw-r--r--natsort/utils.py43
-rw-r--r--test_natsort/test_natsort_keygen.py12
-rw-r--r--test_natsort/test_utils.py3
5 files changed, 57 insertions, 40 deletions
diff --git a/natsort/natsort.py b/natsort/natsort.py
index 7239b2f..3a5caec 100644
--- a/natsort/natsort.py
+++ b/natsort/natsort.py
@@ -202,11 +202,11 @@ def natsort_keygen(key=None, alg=0, **_kwargs):
raise ValueError(msg+', got {0}'.format(py23_str(alg)))
# Add the _DUMB option if the locale library is broken.
- if alg & ns.LOCALE and natsort.compat.locale.dumb_sort():
+ if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort():
alg |= ns._DUMB
# Set some variable that will be passed to the factory functions
- sep = natsort.compat.locale.null_string if alg & ns.LOCALE else ''
+ sep = natsort.compat.locale.null_string if alg & ns.LOCALEALPHA else ''
regex = _regex_chooser[alg & ns._NUMERIC_ONLY]
# Create the functions that will be used to split strings.
diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py
index a4e1c44..b88bdba 100644
--- a/natsort/ns_enum.py
+++ b/natsort/ns_enum.py
@@ -65,12 +65,18 @@ class ns(object):
front. It is the same as setting the old `as_path` option to
`True`.
LOCALE, L
- Tell `natsort` to be locale-aware when sorting strings (everything
- that was not converted to a number). Your sorting results will vary
- depending on your current locale. Generally, the `GROUPLETTERS`
- option is not needed with `LOCALE` because the `locale` library
- groups the letters in the same manner (although you may still
- need `GROUPLETTERS` if there are numbers in your strings).
+ Tell `natsort` to be locale-aware when sorting. This includes both
+ proper sorting of alphabetical characters as well as proper
+ handling of locale-dependent decimal separators and thousands
+ separators. This is a shortcut for
+ ``ns.LOCALEALPHA | ns.LOCALENUM``.
+ Your sorting results will vary depending on your current locale.
+ LOCALEALPHA, LA
+ Tell `natsort` to be locale-aware when sorting, but only for
+ alphabetical characters.
+ LOCALENUM, LN
+ Tell `natsort` to be locale-aware when sorting, but only for
+ decimal separators and thousands separators.
IGNORECASE, IC
Tell `natsort` to ignore case when sorting. For example,
``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as
@@ -93,7 +99,8 @@ class ns(object):
``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as
``['Apple', 'apple', 'Banana', 'banana']``.
Useless when used with `IGNORECASE`; use with `LOWERCASEFIRST`
- to reverse the order of upper and lower case.
+ to reverse the order of upper and lower case. Generally not
+ needed with `LOCALE`.
CAPITALFIRST, C
Only used when `LOCALE` is enabled. Tell `natsort` to put all
capitalized words before non-capitalized words. This is essentially
@@ -148,13 +155,15 @@ class ns(object):
REAL = R = FLOAT | SIGNED
NOEXP = N = 1 << 2
PATH = P = 1 << 3
- LOCALE = L = 1 << 4
- IGNORECASE = IC = 1 << 5
- LOWERCASEFIRST = LF = 1 << 6
- GROUPLETTERS = G = 1 << 7
- UNGROUPLETTERS = UG = 1 << 8
+ LOCALEALPHA = LA = 1 << 4
+ LOCALENUM = LN = 1 << 5
+ LOCALE = L = LOCALEALPHA | LOCALENUM
+ IGNORECASE = IC = 1 << 6
+ LOWERCASEFIRST = LF = 1 << 7
+ GROUPLETTERS = G = 1 << 8
+ UNGROUPLETTERS = UG = 1 << 9
CAPITALFIRST = C = UNGROUPLETTERS
- NANLAST = NL = 1 << 9
+ NANLAST = NL = 1 << 10
# The below are private options for internal use only.
_NUMERIC_ONLY = REAL | NOEXP
diff --git a/natsort/utils.py b/natsort/utils.py
index 98e6ea1..23f83b2 100644
--- a/natsort/utils.py
+++ b/natsort/utils.py
@@ -124,7 +124,7 @@ def _natsort_key(val, key, string_func, bytes_func, num_func):
def _parse_bytes_function(alg):
"""Create a function that will format a bytes string in a tuple."""
- # We don't worry about ns.UNGROUPLETTERS | ns.LOCALE because
+ # We don't worry about ns.UNGROUPLETTERS | ns.LOCALEALPHA because
# bytes cannot be compared to strings.
if alg & ns.PATH and alg & ns.IGNORECASE:
return lambda x: ((x.lower(),),)
@@ -145,9 +145,9 @@ def _parse_number_function(alg, sep):
return (sep, nan_replace if val != val else val)
# Return the function, possibly wrapping in tuple if PATH is selected.
- if alg & ns.PATH and alg & ns.UNGROUPLETTERS and alg & ns.LOCALE:
+ if alg & ns.PATH and alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA:
return lambda x: ((('',), func(x)),)
- elif alg & ns.UNGROUPLETTERS and alg & ns.LOCALE:
+ elif alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA:
return lambda x: (('',), func(x))
elif alg & ns.PATH:
return lambda x: (func(x),)
@@ -157,7 +157,7 @@ def _parse_number_function(alg, sep):
def _parse_string_function(alg, sep, splitter, pre, post, after):
"""Create a function that will properly split and format a string."""
- if not (alg & ns._DUMB and alg & ns.LOCALE):
+ if not (alg & ns._DUMB and alg & ns.LOCALEALPHA):
def func(x):
x = pre(x) # Apply pre-splitting function
original = x
@@ -216,21 +216,6 @@ def _pre_split_function(alg):
lowfirst = alg & ns.LOWERCASEFIRST
dumb = alg & ns._DUMB
- # Create a regular expression that will change the decimal point to
- # a period if not already a period.
- decimal = get_decimal_point()
-
- switch_decimal = r'(?<=[0-9]){decimal}|{decimal}(?=[0-9])'
- switch_decimal = switch_decimal.format(decimal=decimal)
- switch_decimal = re.compile(switch_decimal)
-
- # Create a regular expression that will remove thousands seprarators.
- thousands = get_thousands_sep()
- strip_thousands = (r'(?<![0-9]{{4}})(?<=[0-9]{{1}})'
- r'{thousands}(?=[0-9]{{3}}([^0-9]|$))')
- strip_thousands = strip_thousands.format(thousands=thousands)
- strip_thousands = re.compile(strip_thousands)
-
# Build the chain of functions to execute in order.
function_chain = []
if (dumb and not lowfirst) or (lowfirst and not dumb):
@@ -240,9 +225,23 @@ def _pre_split_function(alg):
function_chain.append(methodcaller('casefold'))
else:
function_chain.append(methodcaller('lower'))
- if alg & ns.LOCALE:
+
+ if alg & ns.LOCALENUM:
+ # Create a regular expression that will remove thousands seprarators.
+ thousands = get_thousands_sep()
+ strip_thousands = (r'(?<![0-9]{{4}})(?<=[0-9]{{1}})'
+ r'{thousands}(?=[0-9]{{3}}([^0-9]|$))')
+ strip_thousands = strip_thousands.format(thousands=thousands)
+ strip_thousands = re.compile(strip_thousands)
function_chain.append(partial(strip_thousands.sub, ''))
+
+ # Create a regular expression that will change the decimal point to
+ # a period if not already a period.
+ decimal = get_decimal_point()
if decimal != '.':
+ switch_decimal = r'(?<=[0-9]){decimal}|{decimal}(?=[0-9])'
+ switch_decimal = switch_decimal.format(decimal=decimal)
+ switch_decimal = re.compile(switch_decimal)
function_chain.append(partial(switch_decimal.sub, '.'))
# Return the chained functions.
@@ -255,7 +254,7 @@ def _post_split_function(alg):
on the post-split strings according to the user's request.
"""
# Shortcuts.
- use_locale = alg & ns.LOCALE
+ use_locale = alg & ns.LOCALEALPHA
dumb = alg & ns._DUMB
group_letters = (alg & ns.GROUPLETTERS) or (use_locale and dumb)
nan_val = float('+inf') if alg & ns.NANLAST else float('-inf')
@@ -281,7 +280,7 @@ def _post_string_parse_function(alg, sep):
Given a set of natsort algorithms, return the function to operate
on the post-parsed strings according to the user's request.
"""
- if alg & ns.UNGROUPLETTERS and alg & ns.LOCALE:
+ if alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA:
swap = alg & ns._DUMB and alg & ns.LOWERCASEFIRST
def func(split_val,
diff --git a/test_natsort/test_natsort_keygen.py b/test_natsort/test_natsort_keygen.py
index 3f48925..e94d3d3 100644
--- a/test_natsort/test_natsort_keygen.py
+++ b/test_natsort/test_natsort_keygen.py
@@ -6,6 +6,7 @@ See the README or the natsort homepage for more details.
from __future__ import unicode_literals, print_function
import warnings
+import locale
from pytest import raises
from natsort import (
natsorted,
@@ -19,6 +20,7 @@ from natsort.compat.locale import (
get_strxfrm,
)
from compat.mock import patch
+from compat.locale import load_locale
INPUT = ['6A-5.034e+1', '/Folder (1)/Foo', 56.7]
@@ -74,19 +76,23 @@ def test_natsort_keygen_splits_input_with_lowercasefirst_noexp_float():
def test_natsort_keygen_splits_input_with_locale():
+ load_locale('en_US')
strxfrm = get_strxfrm()
with patch('natsort.compat.locale.dumb_sort', return_value=False):
assert natsort_keygen(alg=ns.L)(INPUT) == ((null_string, 6, strxfrm('A-'), 5, strxfrm('.'), 34, strxfrm('e+'), 1), (strxfrm('/Folder ('), 1, strxfrm(')/Foo')), (null_string, 56.7))
with patch('natsort.compat.locale.dumb_sort', return_value=True):
assert natsort_keygen(alg=ns.L)(INPUT) == ((null_string, 6, strxfrm('aa--'), 5, strxfrm('..'), 34, strxfrm('eE++'), 1), (strxfrm('//ffoOlLdDeErR (('), 1, strxfrm('))//ffoOoO')), (null_string, 56.7))
- if PY_VERSION >= 3: assert natsort_keygen(alg=ns.L)(b'6A-5.034e+1') == (b'6A-5.034e+1',)
+ if PY_VERSION >= 3: assert natsort_keygen(alg=ns.LA)(b'6A-5.034e+1') == (b'6A-5.034e+1',)
+ locale.setlocale(locale.LC_ALL, str(''))
def test_natsort_keygen_splits_input_with_locale_and_capitalfirst():
+ load_locale('en_US')
strxfrm = get_strxfrm()
with patch('natsort.compat.locale.dumb_sort', return_value=False):
- assert natsort_keygen(alg=ns.L | ns.C)(INPUT) == ((('',), (null_string, 6, strxfrm('A-'), 5, strxfrm('.'), 34, strxfrm('e+'), 1)), (('/',), (strxfrm('/Folder ('), 1, strxfrm(')/Foo'))), (('',), (null_string, 56.7)))
- if PY_VERSION >= 3: assert natsort_keygen(alg=ns.L | ns.C)(b'6A-5.034e+1') == (b'6A-5.034e+1',)
+ assert natsort_keygen(alg=ns.LA | ns.C)(INPUT) == ((('',), (null_string, 6, strxfrm('A-'), 5, strxfrm('.'), 34, strxfrm('e+'), 1)), (('/',), (strxfrm('/Folder ('), 1, strxfrm(')/Foo'))), (('',), (null_string, 56.7)))
+ if PY_VERSION >= 3: assert natsort_keygen(alg=ns.LA | ns.C)(b'6A-5.034e+1') == (b'6A-5.034e+1',)
+ locale.setlocale(locale.LC_ALL, str(''))
def test_natsort_keygen_splits_input_with_path():
diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py
index 406eed8..1e04573 100644
--- a/test_natsort/test_utils.py
+++ b/test_natsort/test_utils.py
@@ -139,6 +139,8 @@ def test_ns_enum_values_have_are_as_expected():
assert ns.SIGNED == ns.S
assert ns.NOEXP == ns.N
assert ns.PATH == ns.P
+ assert ns.LOCALEALPHA == ns.LA
+ assert ns.LOCALENUM == ns.LN
assert ns.LOCALE == ns.L
assert ns.IGNORECASE == ns.IC
assert ns.LOWERCASEFIRST == ns.LF
@@ -149,6 +151,7 @@ def test_ns_enum_values_have_are_as_expected():
assert ns.NANLAST == ns.NL
# Convenience
+ assert ns.LOCALE == ns.LOCALEALPHA | ns.LOCALENUM
assert ns.REAL == ns.FLOAT | ns.SIGNED
assert ns._NUMERIC_ONLY == ns.REAL | ns.NOEXP