diff options
author | Seth M Morton <seth.m.morton@gmail.com> | 2015-06-04 19:03:16 -0700 |
---|---|---|
committer | Seth M Morton <seth.m.morton@gmail.com> | 2015-06-04 19:03:16 -0700 |
commit | 16dac3fd494a4221dd3c03ab2d0b15810874cc4a (patch) | |
tree | 6cf44f16b1ffcad340abc524f4543c81cc295d71 | |
parent | f70ab699b804373d0bd37c5b751f259e85a3100a (diff) | |
parent | 7828933f968c03fda26f172345270a925077c3cf (diff) | |
download | natsort-16dac3fd494a4221dd3c03ab2d0b15810874cc4a.tar.gz |
- Added support for sorting NaN by internally converting to -Infinity
or +Infinity
-rw-r--r-- | README.rst | 13 | ||||
-rw-r--r-- | docs/source/changelog.rst | 6 | ||||
-rw-r--r-- | natsort/_version.py | 2 | ||||
-rw-r--r-- | natsort/ns_enum.py | 7 | ||||
-rw-r--r-- | natsort/utils.py | 20 | ||||
-rw-r--r-- | test_natsort/test_natsort.py | 12 | ||||
-rw-r--r-- | test_natsort/test_utils.py | 23 |
7 files changed, 73 insertions, 10 deletions
@@ -227,6 +227,12 @@ History These are the last three entries of the changelog. See the package documentation for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_. +06-04-2015 v. 4.0.1 +''''''''''''''''''' + + - Added support for sorting NaN by internally converting to -Infinity + or +Infinity + 05-17-2015 v. 4.0.0 ''''''''''''''''''' @@ -245,10 +251,3 @@ for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_. an ordinal sort when using 'LOCALE'. - Added convenience functions 'decoder', 'as_ascii', and 'as_utf8' for dealing with bytes types. - -04-04-2015 v. 3.5.5 -''''''''''''''''''' - - - Added 'realsorted' and 'index_realsorted' functions for - forward-compatibility with >= 4.0.0. - - Made explanation of when to use "TYPESAFE" more clear in the docs. diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index bc636a9..94b2487 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -3,6 +3,12 @@ Changelog --------- +06-04-2015 v. 4.0.1 +''''''''''''''''''' + + - Added support for sorting NaN by internally converting to -Infinity + or +Infinity + 05-17-2015 v. 4.0.0 ''''''''''''''''''' diff --git a/natsort/_version.py b/natsort/_version.py index cc26564..33e7460 100644 --- a/natsort/_version.py +++ b/natsort/_version.py @@ -2,4 +2,4 @@ from __future__ import (print_function, division, unicode_literals, absolute_import) -__version__ = '4.0.0' +__version__ = '4.0.1' diff --git a/natsort/ns_enum.py b/natsort/ns_enum.py index 8b9d794..ebe3374 100644 --- a/natsort/ns_enum.py +++ b/natsort/ns_enum.py @@ -102,6 +102,10 @@ class ns(object): behavior without `LOCALE`. UNGROUPLETTERS, UG An alias for `CAPITALFIRST`. + NANLAST, NL + If an NaN shows up in the input, this instructs `natsort` to + treat these as +Infinity and place them after all the other numbers. + By default, an NaN be treated as -Infinity and be placed first. TYPESAFE, T Try hard to avoid "unorderable types" error on Python 3. It is the same as setting the old `py3_safe` option to `True`. @@ -150,7 +154,8 @@ _ns = { 'GROUPLETTERS': 128, 'G': 128, 'UNGROUPLETTERS': 256, 'UG': 256, 'CAPITALFIRST': 256, 'C': 256, - 'TYPESAFE': 1024, 'T': 1024, + 'NANLAST': 512, 'NL': 512, + 'TYPESAFE': 2048, 'T': 2048, } # Populate the ns class with the _ns values. for x, y in _ns.items(): diff --git a/natsort/utils.py b/natsort/utils.py index a272f29..6d6fd99 100644 --- a/natsort/utils.py +++ b/natsort/utils.py @@ -10,6 +10,7 @@ from __future__ import (print_function, division, # Std. lib imports. import re +from math import isnan from warnings import warn from os import curdir, pardir from os.path import split, splitext @@ -248,6 +249,20 @@ def _py3_safe(parsed_list, use_locale, check): return new_list +def _fix_nan(ret, alg): + """Detect an NaN and replace or raise a ValueError.""" + t = [] + for r in ret: + if isfloat(r, num_only=True) and isnan(r): + if alg & _ns['NANLAST']: + t.append(float('+inf')) + else: + t.append(float('-inf')) + else: + t.append(r) + return tuple(t) + + def _natsort_key(val, key, alg): """\ Key to sort strings and numbers naturally. @@ -325,6 +340,9 @@ def _natsort_key(val, key, alg): alg & _ns['TYPESAFE'], use_locale, gl or (use_locale and dumb))) + # Handle NaN. + if any(isfloat(x, num_only=True) and isnan(x) for x in ret): + ret = _fix_nan(ret, alg) # For UNGROUPLETTERS, so the high level grouping can occur # based on the first letter of the string. # Do no locale transformation of the characters. @@ -359,4 +377,6 @@ def _natsort_key(val, key, alg): # Return as-is, with a leading empty string. except TypeError: n = null_string if use_locale else '' + if isfloat(val, num_only=True) and isnan(val): + val = _fix_nan([val], alg)[0] return ((n, val,),) if alg & _ns['PATH'] else (n, val,) diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index 329d39f..382db24 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -146,6 +146,18 @@ def test_natsorted_with_mixed_input_returns_sorted_results_without_error(): assert natsorted(a) == [1.5, '2', 3, 'b', 'รค'] +def test_natsorted_with_nan_input_returns_sorted_results_with_nan_last_with_NANLAST(): + a = ['25', 5, float('nan'), 1E40] + # The slice is because NaN != NaN + assert natsorted(a, alg=ns.NANLAST)[:3] == [5, '25', 1E40, float('nan')][:3] + + +def test_natsorted_with_nan_input_returns_sorted_results_with_nan_first_without_NANLAST(): + a = ['25', 5, float('nan'), 1E40] + # The slice is because NaN != NaN + assert natsorted(a)[1:] == [float('nan'), 5, '25', 1E40][1:] + + def test_natsorted_with_mixed_input_raises_TypeError_if_bytes_type_is_involved_on_Python3(): if sys.version[0] == '3': with raises(TypeError) as e: diff --git a/test_natsort/test_utils.py b/test_natsort/test_utils.py index 16cf411..3668511 100644 --- a/test_natsort/test_utils.py +++ b/test_natsort/test_utils.py @@ -16,7 +16,7 @@ from natsort.ns_enum import ns from natsort.utils import _number_extracter, _py3_safe, _natsort_key, _args_to_enum from natsort.utils import _float_sign_exp_re, _float_nosign_exp_re, _float_sign_noexp_re from natsort.utils import _float_nosign_noexp_re, _int_nosign_re, _int_sign_re, _do_decoding -from natsort.utils import _path_splitter +from natsort.utils import _path_splitter, _fix_nan from natsort.locale_help import use_pyicu, null_string, locale_convert, dumb_sort from natsort.py23compat import py23_str from slow_splitters import int_splitter, float_splitter, sep_inserter @@ -125,6 +125,16 @@ int_nosafe_nolocale_group = (fast_int, False, False, True) int_nosafe_nolocale_nogroup = (fast_int, False, False, False) +def test_fix_nan_converts_nan_to_negative_infinity_without_NANLAST(): + assert _fix_nan((float('nan'),), 0) == (float('-inf'),) + assert _fix_nan(('a', 'b', float('nan')), 0) == ('a', 'b', float('-inf')) + + +def test_fix_nan_converts_nan_to_positive_infinity_with_NANLAST(): + assert _fix_nan((float('nan'),), ns.NANLAST) == (float('+inf'),) + assert _fix_nan(('a', 'b', float('nan')), ns.NANLAST) == ('a', 'b', float('+inf')) + + # Each test has an "example" version for demonstrative purposes, # and a test that uses the hypothesis module. @@ -380,6 +390,17 @@ def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_w locale.setlocale(locale.LC_NUMERIC, str('')) +def test__natsort_key_with_nan_input_transforms_nan_to_negative_inf(): + assert _natsort_key('nan', None, ns.FLOAT) == (u'', float('-inf')) + assert _natsort_key(float('nan'), None, 0) == (u'', float('-inf')) + + +def test__natsort_key_with_nan_input_and_NANLAST_transforms_nan_to_positive_inf(): + assert _natsort_key('nan', None, ns.FLOAT | ns.NANLAST) == (u'', float('+inf')) + assert _natsort_key(float('nan'), None, ns.NANLAST) == (u'', float('+inf')) + assert ns.NL == ns.NANLAST + + # The remaining tests provide no examples, just hypothesis tests. # They only confirm that _natsort_key uses the above building blocks. |