summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth M Morton <seth.m.morton@gmail.com>2018-02-10 18:29:54 -0800
committerSeth M Morton <seth.m.morton@gmail.com>2018-02-10 18:29:54 -0800
commit48265401aaac36a685d82ea2af0eff7ae71b34a8 (patch)
tree6b47dca9edf00d8510f5f728d94ed7e71b86ca0b
parent391547fbfb52b63509887547a544dba6c0119a39 (diff)
downloadnatsort-48265401aaac36a685d82ea2af0eff7ae71b34a8.tar.gz
Fix "always last" separator for Python 2.
The implementation that was used for the "always last" separator for NUMAFTER when LOCALE was turned on for non-OSX OSs was buggy. This new implementation does not rely on string value tricks, but rather implements a sentinel-based methodology to identify the "always last" separator and place it last explicitly.
-rw-r--r--natsort/compat/locale.py34
1 files changed, 22 insertions, 12 deletions
diff --git a/natsort/compat/locale.py b/natsort/compat/locale.py
index a1cfa5a..cc386a2 100644
--- a/natsort/compat/locale.py
+++ b/natsort/compat/locale.py
@@ -14,7 +14,6 @@ from natsort.compat.py23 import (
PY_VERSION,
cmp_to_key,
py23_unichr,
- py23_cmp,
)
# This string should be sorted after any other byte string because
@@ -62,20 +61,31 @@ except ImportError:
import locale
if PY_VERSION < 3:
from locale import strcoll
- strxfrm = cmp_to_key(strcoll)
+ sentinel = object()
+
+ def custom_strcoll(a, b, last=sentinel):
+ """strcoll that can handle a sentinel that is always last."""
+ if a is last:
+ if b is last:
+ return 0
+ else:
+ return 1
+ elif b is last: # a cannot also be sentinel b/c above logic
+ return -1
+ else: # neither are sentinel
+ return strcoll(a, b)
+
+ strxfrm = cmp_to_key(custom_strcoll)
+ null_string_locale = strxfrm('')
+ null_string_locale_max = strxfrm(sentinel)
else:
from locale import strxfrm
+ null_string_locale = ''
- null_string_locale = ''
-
- # This string should be sorted after any other byte string because
- # it contains the max unicode character repeated 20 times.
- # You would need some odd data to come after that.
- null_string_locale_max = py23_unichr(sys.maxunicode) * 20
-
- if PY_VERSION < 3:
- null_string_locale = cmp_to_key(py23_cmp)(null_string_locale)
- null_string_locale_max = cmp_to_key(py23_cmp)(null_string_locale_max)
+ # This string should be sorted after any other byte string because
+ # it contains the max unicode character repeated 20 times.
+ # You would need some odd data to come after that.
+ null_string_locale_max = py23_unichr(sys.maxunicode) * 20
# On some systems, locale is broken and does not sort in the expected
# order. We will try to detect this and compensate.