From 73cdf7c4613c29d95e76356c7c8660d298bce760 Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Sat, 28 Jun 2014 22:14:26 -0700 Subject: The develop branch shows Travis-CI develop badge. --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 373a3de..a48f758 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ natsort ======= -.. image:: https://travis-ci.org/SethMMorton/natsort.svg?branch=master +.. image:: https://travis-ci.org/SethMMorton/natsort.svg?branch=develop :target: https://travis-ci.org/SethMMorton/natsort Natural sorting for python. ``natsort`` requires python version 2.6 or greater -- cgit v1.2.1 From 7d2f2e03d3f6743b690ebdc560dad60ef91e03bb Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Mon, 14 Jul 2014 18:13:21 -0700 Subject: Reworked docstrings in NumpyDoc style. --- natsort/natsort.py | 351 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 219 insertions(+), 132 deletions(-) diff --git a/natsort/natsort.py b/natsort/natsort.py index e7c3e04..83aa848 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -105,82 +105,101 @@ def _py3_safe(parsed_list): @u_format def natsort_key(s, number_type=float, signed=True, exp=True, py3_safe=False): """\ + Key to sort strings and numbers naturally. + Key to sort strings and numbers naturally, not lexicographically. It is designed for use in passing to the 'sorted' builtin or 'sort' attribute of lists. - s - The value used by the sorting algorithm - - number_type (None, float, int) - The types of number to sort on: float searches for floating point - numbers, int searches for integers, and None searches for digits - (like integers but does not take into account negative sign). - None is a shortcut for number_type = int and signed = False. - - signed (True, False) - By default a '+' or '-' before a number is taken to be the sign - of the number. If signed is False, any '+' or '-' will not be - considered to be part of the number, but as part part of the string. - - exp (True, False) - This option only applies to number_type = float. If exp = True, - a string like "3.5e5" will be interpreted as 350000, i.e. the - exponential part is considered to be part of the number. - If exp = False, "3.5e5" is interpreted as (3.5, "e", 5). - The default behavior is exp = True. - - py3_safe (True, False) - This will make the string parsing algorithm be more careful by - placing an empty string between two adjacent numbers after the - parsing algorithm. This will prevent the "unorderable types" error. - - returns - The modified value with numbers extracted. - - Using natsort_key is just like any other sorting key in python + .. note:: Depreciation Notice (3.3.1) + This function remains in the publicly exposed API for + backwards-compatibility reasons, but future development + should use the newer `natsort_keygen` function. There + are no plans to officially remove this method from the + public API, but it leads to messier code than using + `natsort_keygen` so the latter should be preferred. + + Parameters + ---------- + val : {{str, unicode}} + The value used by the sorting algorithm + + number_type : {{None, float, int}}, optional + The types of number to sort on: `float` searches for floating + point numbers, `int` searches for integers, and `None `searches + for digits (like integers but does not take into account + negative sign). `None` is a shortcut for `number_type = int` + and `signed = False`. + + signed : {{True, False}}, optional + By default a '+' or '-' before a number is taken to be the sign + of the number. If `signed` is `False`, any '+' or '-' will not + be considered to be part of the number, but as part part of the + string. + + exp : {{True, False}}, optional + This option only applies to `number_type = float`. If + `exp = True`, a string like "3.5e5" will be interpreted as + 350000, i.e. the exponential part is considered to be part of + the number. If `exp = False`, "3.5e5" is interpreted as + ``(3.5, "e", 5)``. The default behavior is `exp = True`. + + py3_safe : {{True, False}}, optional + This will make the string parsing algorithm be more careful by + placing an empty string between two adjacent numbers after the + parsing algorithm. This will prevent the "unorderable types" + error. + + Returns + ------- + out : tuple + The modified value with numbers extracted. + + See Also + -------- + natsort_keygen : Generates a properly wrapped `natsort_key`. + + Examples + -------- + Using natsort_key is just like any other sorting key in python:: >>> a = ['num3', 'num5', 'num2'] >>> a.sort(key=natsort_key) >>> a [{u}'num2', {u}'num3', {u}'num5'] - It works by separating out the numbers from the strings + It works by separating out the numbers from the strings:: >>> natsort_key('num2') ({u}'num', 2.0) - If you need to call natsort_key with the number_type argument, or get a special - attribute or item of each element of the sequence, the easiest way is to make a - lambda expression that calls natsort_key:: - - >>> from operator import itemgetter - >>> a = [['num4', 'b'], ['num8', 'c'], ['num2', 'a']] - >>> f = itemgetter(0) - >>> a.sort(key=lambda x: natsort_key(f(x), number_type=int)) - >>> a - [[{u}'num2', {u}'a'], [{u}'num4', {u}'b'], [{u}'num8', {u}'c']] + If you need to call natsort_key with the number_type argument, or get a + special attribute or item of each element of the sequence, please use + the `natsort_keygen` function. Actually, please just use the + `natsort_keygen` function. - Iterables are parsed recursively so you can sort lists of lists. + Notes + ----- + Iterables are parsed recursively so you can sort lists of lists:: >>> natsort_key(('a1', 'a10')) (({u}'a', 1.0), ({u}'a', 10.0)) - Strings that lead with a number get an empty string at the front of the tuple. - This is designed to get around the "unorderable types" issue of Python3. + Strings that lead with a number get an empty string at the front of the + tuple. This is designed to get around the "unorderable types" issue of + Python3:: >>> natsort_key('15a') ({u}'', 15.0, {u}'a') - You can give bare numbers, too. + You can give bare numbers, too:: >>> natsort_key(10) ({u}'', 10) - If you have a case where one of your string has two numbers in a row - (only possible with "5+5" or "5-5" and signed=True to my knowledge), you - can turn on the "py3_safe" option to try to add a "" between sets of two - numbers. + If you have a case where one of your string has two numbers in a row, + you can turn on the "py3_safe" option to try to add a "" between sets + of two numbers:: >>> natsort_key('43h7+3', py3_safe=True) ({u}'', 43.0, {u}'h', 7.0, {u}'', 3.0) @@ -216,37 +235,55 @@ def natsort_key(s, number_type=float, signed=True, exp=True, py3_safe=False): @u_format def natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=True): """\ - Sorts a sequence naturally (alphabetically and numerically), - not lexicographically. - - seq (iterable) - The sequence to sort. - - key (function) - A key used to determine how to sort each element of the sequence. - - number_type (None, float, int) - The types of number to sort on: float searches for floating point - numbers, int searches for integers, and None searches for digits - (like integers but does not take into account negative sign). - None is a shortcut for number_type = int and signed = False. - - signed (True, False) - By default a '+' or '-' before a number is taken to be the sign - of the number. If signed is False, any '+' or '-' will not be - considered to be part of the number, but as part part of the string. - - exp (True, False) - This option only applies to number_type = float. If exp = True, - a string like "3.5e5" will be interpreted as 350000, i.e. the - exponential part is considered to be part of the number. - If exp = False, "3.5e5" is interpreted as (3.5, "e", 5). - The default behavior is exp = True. - - returns - The sorted sequence. + Sorts a sequence naturally. - Use natsorted just like the builtin sorted + Sorts a sequence naturally (alphabetically and numerically), + not lexicographically. Returns a new copy of the sorted + sequence as a list. + + Parameters + ---------- + seq : iterable + The sequence to sort. + + key : callable, optional + A key used to determine how to sort each element of the sequence. + It is **not** applied recursively. + It should accept a single argument and return a single value. + + number_type : {{None, float, int}}, optional + The types of number to sort on: `float` searches for floating + point numbers, `int` searches for integers, and `None `searches + for digits (like integers but does not take into account + negative sign). `None` is a shortcut for `number_type = int` + and `signed = False`. + + signed : {{True, False}}, optional + By default a '+' or '-' before a number is taken to be the sign + of the number. If `signed` is `False`, any '+' or '-' will not + be considered to be part of the number, but as part part of the + string. + + exp : {{True, False}}, optional + This option only applies to `number_type = float`. If + `exp = True`, a string like "3.5e5" will be interpreted as + 350000, i.e. the exponential part is considered to be part of + the number. If `exp = False`, "3.5e5" is interpreted as + ``(3.5, "e", 5)``. The default behavior is `exp = True`. + + Returns + ------- + out: list + The sorted sequence. + + See Also + -------- + versorted : A wrapper for ``natsorted(seq, number_type=None)``. + index_natsorted : Returns the sorted indexes from `natsorted`. + + Examples + -------- + Use `natsorted` just like the builtin `sorted`:: >>> a = ['num3', 'num5', 'num2'] >>> natsorted(a) @@ -273,19 +310,33 @@ def natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=True): @u_format def versorted(seq, key=lambda x: x): """\ + Convenience function to sort version numbers. + Convenience function to sort version numbers. This is a wrapper - around natsorted(seq, number_type=None). + around ``natsorted(seq, number_type=None)``. - seq (iterable) - The sequence to sort. + Parameters + ---------- + seq : iterable + The sequence to sort. - key (function) - A key used to determine how to sort each element of the sequence. + key : callable, optional + A key used to determine how to sort each element of the sequence. + It is **not** applied recursively. + It should accept a single argument and return a single value. - returns - The sorted sequence. + Returns + ------- + out : list + The sorted sequence. - Use versorted just like the builtin sorted + See Also + -------- + index_versorted : Returns the sorted indexes from `versorted`. + + Examples + -------- + Use `versorted` just like the builtin `sorted`:: >>> a = ['num4.0.2', 'num3.4.1', 'num3.4.2'] >>> versorted(a) @@ -298,40 +349,58 @@ def versorted(seq, key=lambda x: x): @u_format def index_natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=True): """\ - Sorts a sequence naturally, but returns a list of sorted the - indexes and not the sorted list. - - seq (iterable) - The sequence to sort. - - key (function) - A key used to determine how to sort each element of the sequence. - - number_type (None, float, int) - The types of number to sort on: float searches for floating point - numbers, int searches for integers, and None searches for digits - (like integers but does not take into account negative sign). - None is a shortcut for number_type = int and signed = False. - - signed (True, False) - By default a '+' or '-' before a number is taken to be the sign - of the number. If signed is False, any '+' or '-' will not be - considered to be part of the number, but as part part of the string. + Return the list of the indexes used to sort the input sequence. - exp (True, False) - This option only applies to number_type = float. If exp = True, - a string like "3.5e5" will be interpreted as 350000, i.e. the - exponential part is considered to be part of the number. - If exp = False, "3.5e5" is interpreted as (3.5, "e", 5). - The default behavior is exp = True. - - returns - The ordered indexes of the sequence. - - Use index_natsorted if you want to sort multiple lists by the sort order of - one list: + Sorts a sequence naturally, but returns a list of sorted the + indexes and not the sorted list. This list of indexes can be + used to sort multiple lists by the sorted order of the given + sequence. + + Parameters + ---------- + seq : iterable + The sequence to sort. + + key : callable, optional + A key used to determine how to sort each element of the sequence. + It is **not** applied recursively. + It should accept a single argument and return a single value. + + number_type : {{None, float, int}}, optional + The types of number to sort on: `float` searches for floating + point numbers, `int` searches for integers, and `None `searches + for digits (like integers but does not take into account + negative sign). `None` is a shortcut for `number_type = int` + and `signed = False`. + + signed : {{True, False}}, optional + By default a '+' or '-' before a number is taken to be the sign + of the number. If `signed` is `False`, any '+' or '-' will not + be considered to be part of the number, but as part part of the + string. + + exp : {{True, False}}, optional + This option only applies to `number_type = float`. If + `exp = True`, a string like "3.5e5" will be interpreted as + 350000, i.e. the exponential part is considered to be part of + the number. If `exp = False`, "3.5e5" is interpreted as + ``(3.5, "e", 5)``. The default behavior is `exp = True`. + + Returns + ------- + out : tuple + The ordered indexes of the sequence. + + See Also + -------- + natsorted + + Examples + -------- + + Use index_natsorted if you want to sort multiple lists by the + sorted order of one list:: - >>> from natsort import index_natsorted >>> a = ['num3', 'num5', 'num2'] >>> b = ['foo', 'bar', 'baz'] >>> index = index_natsorted(a) @@ -368,20 +437,38 @@ def index_natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=Tr @u_format def index_versorted(seq, key=lambda x: x): """\ - Convenience function to sort version numbers but return the - indexes of how the sequence would be sorted. - This is a wrapper around index_natsorted(seq, number_type=None). - - seq (iterable) - The sequence to sort. + Return the list of the indexes used to sort the input sequence + of version numbers. - key (function) - A key used to determine how to sort each element of the sequence. - - returns - The ordered indexes of the sequence. - - Use index_versorted just like the builtin sorted + Sorts a sequence naturally, but returns a list of sorted the + indexes and not the sorted list. This list of indexes can be + used to sort multiple lists by the sorted order of the given + sequence. + + This is a wrapper around ``index_natsorted(seq, number_type=None)``. + + Parameters + ---------- + seq: iterable + The sequence to sort. + + key: callable, optional + A key used to determine how to sort each element of the sequence. + It is **not** applied recursively. + It should accept a single argument and return a single value. + + Returns + ------- + out : tuple + The ordered indexes of the sequence. + + See Also + -------- + versorted + + Examples + -------- + Use `index_versorted` just like the builtin `sorted`:: >>> a = ['num4.0.2', 'num3.4.1', 'num3.4.2'] >>> index_versorted(a) -- cgit v1.2.1 From 108363e71365196e4b1414498c9ea18f08f3bb66 Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Mon, 14 Jul 2014 18:28:08 -0700 Subject: Added the natsort_keygen function. The natsort_keygen function is used to create a wrapped version of the natsort_key which can be used to supply options when calling the natsort_key. This function is intended to depreciate the public natsort_key in the future. --- natsort/__init__.py | 3 +- natsort/natsort.py | 113 +++++++++++++++++++++---- test_natsort/test_natsort.py | 183 +++++++++++++++++++++++++++++++++++++++++ test_natsort/test_natsorted.py | 159 ----------------------------------- 4 files changed, 284 insertions(+), 174 deletions(-) create mode 100644 test_natsort/test_natsort.py delete mode 100644 test_natsort/test_natsorted.py diff --git a/natsort/__init__.py b/natsort/__init__.py index 7c474e7..49693a1 100644 --- a/natsort/__init__.py +++ b/natsort/__init__.py @@ -1,11 +1,12 @@ # -*- coding: utf-8 -*- from __future__ import print_function, division, unicode_literals, absolute_import -from .natsort import natsort_key, natsorted, index_natsorted, versorted, index_versorted +from .natsort import natsort_key, natsort_keygen, natsorted, index_natsorted, versorted, index_versorted from ._version import __version__ __all__ = [ 'natsort_key', + 'natsort_keygen', 'natsorted', 'versorted' 'index_natsorted', diff --git a/natsort/natsort.py b/natsort/natsort.py index 83aa848..6f7f0e3 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -17,6 +17,7 @@ from __future__ import print_function, division, unicode_literals, absolute_impo import re import sys from operator import itemgetter +from functools import partial from numbers import Number from itertools import islice @@ -232,6 +233,96 @@ def natsort_key(s, number_type=float, signed=True, exp=True, py3_safe=False): return tuple(_number_finder(*args)) +@u_format +def natsort_keygen(key=None, number_type=float, signed=True, exp=True, py3_safe=False): + """\ + Generate a key to sort strings and numbers naturally. + + Generate a key to sort strings and numbers naturally, + not lexicographically. This key is designed for use as the + `key` argument to functions such as the `sorted` builtin. + + The user may customize the generated function with the + arguments to `natsort_keygen`, including an optional + `key` function which will be called before the `natsort_key`. + + Parameters + ---------- + key : callable, optional + A key used to manipulate the input value before parsing for + numbers. It is **not** applied recursively. + It should accept a single argument and return a single value. + + number_type : {{None, float, int}}, optional + The types of number to sort on: `float` searches for floating + point numbers, `int` searches for integers, and `None `searches + for digits (like integers but does not take into account + negative sign). `None` is a shortcut for `number_type = int` + and `signed = False`. + + signed : {{True, False}}, optional + By default a '+' or '-' before a number is taken to be the sign + of the number. If `signed` is `False`, any '+' or '-' will not + be considered to be part of the number, but as part part of the + string. + + exp : {{True, False}}, optional + This option only applies to `number_type = float`. If + `exp = True`, a string like "3.5e5" will be interpreted as + 350000, i.e. the exponential part is considered to be part of + the number. If `exp = False`, "3.5e5" is interpreted as + ``(3.5, "e", 5)``. The default behavior is `exp = True`. + + py3_safe : {{True, False}}, optional + This will make the string parsing algorithm be more careful by + placing an empty string between two adjacent numbers after the + parsing algorithm. This will prevent the "unorderable types" + error. + + Returns + ------- + out : function + A wrapped version of the `natsort_key` function that is + suitable for passing as the `key` argument to functions + such as `sorted`. + + Examples + -------- + `natsort_keygen` is a convenient waynto create a custom key + to sort lists in-place (for example). Calling with no objects + will return a plain `natsort_key` instance:: + + >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] + >>> b = a[:] + >>> a.sort(key=natsort_key) + >>> b.sort(key=natsort_keygen()) + >>> a == b + True + + The power of `natsort_keygen` is when you want to want to pass + arguments to the `natsort_key`. Consider the following + equivalent examples; which is more clear? :: + + >>> a = [[1, 'num5.10'], [2, 'num-3'], [3, 'num5.3'], [4, 'num2']] + >>> b = a[:] + >>> a.sort(key=lambda x: natsort_key(itemgetter(1)(x), signed=False)) + >>> b.sort(key=natsort_keygen(key=itemgetter(1), signed=False)) + >>> a == b + True + + """ + # If no key, simply wrap the function + if key is None: + return partial(natsort_key, number_type=number_type, + signed=signed, + exp=exp, + py3_safe=py3_safe) + # If a key is given, wrap the function and make sure + # the key is called before the natsort_key. + else: + return lambda val: natsort_key(key(val), number_type, signed, exp, py3_safe) + + @u_format def natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=True): """\ @@ -291,17 +382,14 @@ def natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=True): """ try: - return sorted(seq, key=lambda x: natsort_key(key(x), - number_type=number_type, - signed=signed, exp=exp)) + return sorted(seq, key=natsort_keygen(key, number_type, + signed, exp)) except TypeError as e: # In the event of an unresolved "unorderable types" error # attempt to sort again, being careful to prevent this error. if 'unorderable types' in str(e): - return sorted(seq, key=lambda x: natsort_key(key(x), - number_type=number_type, - signed=signed, exp=exp, - py3_safe=True)) + return sorted(seq, key=natsort_keygen(key, number_type, + signed, exp, True)) else: # Re-raise if the problem was not "unorderable types" raise @@ -417,17 +505,14 @@ def index_natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=Tr # Pair the index and sequence together, then sort by element index_seq_pair = [[x, key(y)] for x, y in py23_zip(py23_range(len(seq)), seq)] try: - index_seq_pair.sort(key=lambda x: natsort_key(item1(x), - number_type=number_type, - signed=signed, exp=exp)) + index_seq_pair.sort(key=natsort_keygen(item1, number_type, + signed, exp)) except TypeError as e: # In the event of an unresolved "unorderable types" error # attempt to sort again, being careful to prevent this error. if 'unorderable types' in str(e): - index_seq_pair.sort(key=lambda x: natsort_key(item1(x), - number_type=number_type, - signed=signed, exp=exp, - py3_safe=True)) + index_seq_pair.sort(key=natsort_keygen(item1, number_type, + signed, exp, True)) else: # Re-raise if the problem was not "unorderable types" raise diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py new file mode 100644 index 0000000..e7bc921 --- /dev/null +++ b/test_natsort/test_natsort.py @@ -0,0 +1,183 @@ +# -*- coding: utf-8 -*- +"""\ +Here are a collection of examples of how this module can be used. +See the README or the natsort homepage for more details. +""" +from operator import itemgetter +from pytest import raises +from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted, natsort_keygen +from natsort.natsort import _remove_empty, _number_finder, _py3_safe +from natsort.natsort import float_sign_exp_re, float_nosign_exp_re, float_sign_noexp_re +from natsort.natsort import float_nosign_noexp_re, int_nosign_re, int_sign_re + + +def test__remove_empty(): + + assert _remove_empty(['a', 2, '', 'b', '']) == ['a', 2, 'b'] + assert _remove_empty(['a', 2, 'b', '']) == ['a', 2, 'b'] + assert _remove_empty(['a', 2, 'b']) == ['a', 2, 'b'] + + +def test_number_finder(): + + assert _number_finder('a5+5.034e-1', float_sign_exp_re, float, False) == ['a', 5.0, 0.5034] + assert _number_finder('a5+5.034e-1', float_nosign_exp_re, float, False) == ['a', 5.0, '+', 0.5034] + assert _number_finder('a5+5.034e-1', float_sign_noexp_re, float, False) == ['a', 5.0, 5.034, 'e', -1.0] + assert _number_finder('a5+5.034e-1', float_nosign_noexp_re, float, False) == ['a', 5.0, '+', 5.034, 'e-', 1.0] + assert _number_finder('a5+5.034e-1', int_nosign_re, int, False) == ['a', 5, '+', 5, '.', 34, 'e-', 1] + assert _number_finder('a5+5.034e-1', int_sign_re, int, False) == ['a', 5, 5, '.', 34, 'e', -1] + + assert _number_finder('a5+5.034e-1', float_sign_exp_re, float, True) == ['a', 5.0, '', 0.5034] + assert _number_finder('a5+5.034e-1', float_nosign_exp_re, float, True) == ['a', 5.0, '+', 0.5034] + assert _number_finder('a5+5.034e-1', float_sign_noexp_re, float, True) == ['a', 5.0, '', 5.034, 'e', -1.0] + assert _number_finder('a5+5.034e-1', float_nosign_noexp_re, float, True) == ['a', 5.0, '+', 5.034, 'e-', 1.0] + assert _number_finder('a5+5.034e-1', int_nosign_re, int, True) == ['a', 5, '+', 5, '.', 34, 'e-', 1] + assert _number_finder('a5+5.034e-1', int_sign_re, int, True) == ['a', 5, '', 5, '.', 34, 'e', -1] + + assert _number_finder('6a5+5.034e-1', float_sign_exp_re, float, False) == ['', 6.0, 'a', 5.0, 0.5034] + assert _number_finder('6a5+5.034e-1', float_sign_exp_re, float, True) == ['', 6.0, 'a', 5.0, '', 0.5034] + + +def test_py3_safe(): + + assert _py3_safe(['a', 'b', 'c']) == ['a', 'b', 'c'] + assert _py3_safe(['a']) == ['a'] + assert _py3_safe(['a', 5]) == ['a', 5] + assert _py3_safe([5, 9]) == [5, '', 9] + + +def test_natsort_key(): + + a = ['num3', 'num5', 'num2'] + a.sort(key=natsort_key) + assert a == ['num2', 'num3', 'num5'] + + # The below illustrates how the key works, and how the different options affect sorting. + assert natsort_key('a-5.034e1') == ('a', -50.34) + assert natsort_key('a-5.034e1', number_type=float, signed=True, exp=True) == ('a', -50.34) + assert natsort_key('a-5.034e1', number_type=float, signed=True, exp=False) == ('a', -5.034, 'e', 1.0) + assert natsort_key('a-5.034e1', number_type=float, signed=False, exp=True) == ('a-', 50.34) + assert natsort_key('a-5.034e1', number_type=float, signed=False, exp=False) == ('a-', 5.034, 'e', 1.0) + assert natsort_key('a-5.034e1', number_type=int) == ('a', -5, '.', 34, 'e', 1) + assert natsort_key('a-5.034e1', number_type=int, signed=False) == ('a-', 5, '.', 34, 'e', 1) + assert natsort_key('a-5.034e1', number_type=None) == natsort_key('a-5.034e1', number_type=int, signed=False) + + # Iterables are parsed recursively so you can sort lists of lists. + assert natsort_key(('a1', 'a10')) == (('a', 1.0), ('a', 10.0)) + + # Strings that lead with a number get an empty string at the front of the tuple. + # This is designed to get around the "unorderable types" issue. + assert natsort_key(('15a', '6')) == (('', 15.0, 'a'), ('', 6.0)) + assert natsort_key(10) == ('', 10) + + # Turn on py3_safe to put a '' between adjacent numbers + assert natsort_key('43h7+3', py3_safe=True) == ('', 43.0, 'h', 7.0, '', 3.0) + + # Invalid arguments give the correct response + with raises(ValueError) as err: + natsort_key('a', number_type='float') + assert str(err.value) == "natsort_key: 'number_type' parameter 'float' invalid" + with raises(ValueError) as err: + natsort_key('a', signed='True') + assert str(err.value) == "natsort_key: 'signed' parameter 'True' invalid" + with raises(ValueError) as err: + natsort_key('a', exp='False') + assert str(err.value) == "natsort_key: 'exp' parameter 'False' invalid" + + +def test_natsort_keygen(): + + # Creates equivalent natsort keys + a = 'a-5.034e1' + assert natsort_keygen()(a) == natsort_key(a) + assert natsort_keygen(signed=False)(a) == natsort_key(a, signed=False) + assert natsort_keygen(exp=False)(a) == natsort_key(a, exp=False) + assert natsort_keygen(signed=False, exp=False)(a) == natsort_key(a, signed=False, exp=False) + assert natsort_keygen(number_type=int)(a) == natsort_key(a, number_type=int) + assert natsort_keygen(number_type=int, signed=False)(a) == natsort_key(a, number_type=int, signed=False) + assert natsort_keygen(number_type=None)(a) == natsort_key(a, number_type=None) + + # Custom keys are more straightforward with keygen + f1 = natsort_keygen(key=lambda x: x.upper()) + f2 = lambda x: natsort_key(x.upper()) + assert f1(a) == f2(a) + + # It also makes sorting lists in-place easier (no lambdas!) + a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] + b = a[:] + a.sort(key=natsort_keygen(number_type=int)) + assert a == natsorted(b, number_type=int) + + +def test_natsorted(): + + # Basic usage + a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6'] + assert natsorted(a) == ['a1', 'a2', 'a4', 'a5', 'a6', 'a9', 'a10'] + + # Number types + a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] + assert natsorted(a) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] + assert natsorted(a, number_type=float, exp=False) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.'] + assert natsorted(a, number_type=int) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] + assert natsorted(a, number_type=None) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] + + # Signed option + a = ['a-5', 'a7', 'a+2'] + assert natsorted(a) == ['a-5', 'a+2', 'a7'] + assert natsorted(a, signed=False) == ['a7', 'a+2', 'a-5'] + + # Number type == None + a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] + assert natsorted(a) == ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b'] + assert natsorted(a, number_type=None) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] + + # You can mix types with natsorted. This can get around the new + # 'unorderable types' issue with Python 3. + a = [6, 4.5, '7', '2.5', 'a'] + assert natsorted(a) == ['2.5', 4.5, 6, '7', 'a'] + a = [46, '5a5b2', 'af5', '5a5-4'] + assert natsorted(a) == ['5a5-4', '5a5b2', 46, 'af5'] + + # You still can't sort non-iterables + with raises(TypeError) as err: + natsorted(100) + assert str(err.value) == "'int' object is not iterable" + + # natsort will recursively descend into lists of lists so you can sort by the sublist contents. + data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']] + assert natsorted(data) == [['a1', 'a5'], ['a1', 'a40'], ['a2', 'a5'], ['a10', 'a1']] + + # You can pass a key to do non-standard sorting rules + b = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] + assert natsorted(b, key=itemgetter(1)) == [('c', 'num2'), ('a', 'num3'), ('b', 'num5')] + + +def test_versorted(): + + a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] + assert versorted(a) == natsorted(a, number_type=None) + +def test_index_natsorted(): + + # Return the indexes of how the iterable would be sorted. + a = ['num3', 'num5', 'num2'] + b = ['foo', 'bar', 'baz'] + index = index_natsorted(a) + assert index == [2, 0, 1] + assert [a[i] for i in index] == ['num2', 'num3', 'num5'] + assert [b[i] for i in index] == ['baz', 'foo', 'bar'] + + # It accepts a key argument. + c = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] + assert index_natsorted(c, key=itemgetter(1)) == [2, 0, 1] + + # It can avoid "unorderable types" on Python 3 + a = [46, '5a5b2', 'af5', '5a5-4'] + assert index_natsorted(a) == [3, 1, 0, 2] + + +def test_index_versorted(): + + a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] + assert index_versorted(a) == index_natsorted(a, number_type=None) diff --git a/test_natsort/test_natsorted.py b/test_natsort/test_natsorted.py deleted file mode 100644 index bfb071d..0000000 --- a/test_natsort/test_natsorted.py +++ /dev/null @@ -1,159 +0,0 @@ -# -*- coding: utf-8 -*- -"""\ -Here are a collection of examples of how this module can be used. -See the README or the natsort homepage for more details. -""" -from operator import itemgetter -from pytest import raises -from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted -from natsort.natsort import _remove_empty, _number_finder, _py3_safe -from natsort.natsort import float_sign_exp_re, float_nosign_exp_re, float_sign_noexp_re -from natsort.natsort import float_nosign_noexp_re, int_nosign_re, int_sign_re - - -def test__remove_empty(): - - assert _remove_empty(['a', 2, '', 'b', '']) == ['a', 2, 'b'] - assert _remove_empty(['a', 2, 'b', '']) == ['a', 2, 'b'] - assert _remove_empty(['a', 2, 'b']) == ['a', 2, 'b'] - - -def test_number_finder(): - - assert _number_finder('a5+5.034e-1', float_sign_exp_re, float, False) == ['a', 5.0, 0.5034] - assert _number_finder('a5+5.034e-1', float_nosign_exp_re, float, False) == ['a', 5.0, '+', 0.5034] - assert _number_finder('a5+5.034e-1', float_sign_noexp_re, float, False) == ['a', 5.0, 5.034, 'e', -1.0] - assert _number_finder('a5+5.034e-1', float_nosign_noexp_re, float, False) == ['a', 5.0, '+', 5.034, 'e-', 1.0] - assert _number_finder('a5+5.034e-1', int_nosign_re, int, False) == ['a', 5, '+', 5, '.', 34, 'e-', 1] - assert _number_finder('a5+5.034e-1', int_sign_re, int, False) == ['a', 5, 5, '.', 34, 'e', -1] - - assert _number_finder('a5+5.034e-1', float_sign_exp_re, float, True) == ['a', 5.0, '', 0.5034] - assert _number_finder('a5+5.034e-1', float_nosign_exp_re, float, True) == ['a', 5.0, '+', 0.5034] - assert _number_finder('a5+5.034e-1', float_sign_noexp_re, float, True) == ['a', 5.0, '', 5.034, 'e', -1.0] - assert _number_finder('a5+5.034e-1', float_nosign_noexp_re, float, True) == ['a', 5.0, '+', 5.034, 'e-', 1.0] - assert _number_finder('a5+5.034e-1', int_nosign_re, int, True) == ['a', 5, '+', 5, '.', 34, 'e-', 1] - assert _number_finder('a5+5.034e-1', int_sign_re, int, True) == ['a', 5, '', 5, '.', 34, 'e', -1] - - assert _number_finder('6a5+5.034e-1', float_sign_exp_re, float, False) == ['', 6.0, 'a', 5.0, 0.5034] - assert _number_finder('6a5+5.034e-1', float_sign_exp_re, float, True) == ['', 6.0, 'a', 5.0, '', 0.5034] - - -def test_py3_safe(): - - assert _py3_safe(['a', 'b', 'c']) == ['a', 'b', 'c'] - assert _py3_safe(['a']) == ['a'] - assert _py3_safe(['a', 5]) == ['a', 5] - assert _py3_safe([5, 9]) == [5, '', 9] - - -def test_natsort_key(): - - a = ['num3', 'num5', 'num2'] - a.sort(key=natsort_key) - assert a == ['num2', 'num3', 'num5'] - - # The below illustrates how the key works, and how the different options affect sorting. - assert natsort_key('a-5.034e1') == ('a', -50.34) - assert natsort_key('a-5.034e1', number_type=float, signed=True, exp=True) == ('a', -50.34) - assert natsort_key('a-5.034e1', number_type=float, signed=True, exp=False) == ('a', -5.034, 'e', 1.0) - assert natsort_key('a-5.034e1', number_type=float, signed=False, exp=True) == ('a-', 50.34) - assert natsort_key('a-5.034e1', number_type=float, signed=False, exp=False) == ('a-', 5.034, 'e', 1.0) - assert natsort_key('a-5.034e1', number_type=int) == ('a', -5, '.', 34, 'e', 1) - assert natsort_key('a-5.034e1', number_type=int, signed=False) == ('a-', 5, '.', 34, 'e', 1) - assert natsort_key('a-5.034e1', number_type=None) == natsort_key('a-5.034e1', number_type=int, signed=False) - - # Iterables are parsed recursively so you can sort lists of lists. - assert natsort_key(('a1', 'a10')) == (('a', 1.0), ('a', 10.0)) - - # Strings that lead with a number get an empty string at the front of the tuple. - # This is designed to get around the "unorderable types" issue. - assert natsort_key(('15a', '6')) == (('', 15.0, 'a'), ('', 6.0)) - assert natsort_key(10) == ('', 10) - - # Turn on py3_safe to put a '' between adjacent numbers - assert natsort_key('43h7+3', py3_safe=True) == ('', 43.0, 'h', 7.0, '', 3.0) - - # Invalid arguments give the correct response - with raises(ValueError) as err: - natsort_key('a', number_type='float') - assert str(err.value) == "natsort_key: 'number_type' parameter 'float' invalid" - with raises(ValueError) as err: - natsort_key('a', signed='True') - assert str(err.value) == "natsort_key: 'signed' parameter 'True' invalid" - with raises(ValueError) as err: - natsort_key('a', exp='False') - assert str(err.value) == "natsort_key: 'exp' parameter 'False' invalid" - - -def test_natsorted(): - - # Basic usage - a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6'] - assert natsorted(a) == ['a1', 'a2', 'a4', 'a5', 'a6', 'a9', 'a10'] - - # Number types - a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] - assert natsorted(a) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] - assert natsorted(a, number_type=float, exp=False) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.'] - assert natsorted(a, number_type=int) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] - assert natsorted(a, number_type=None) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] - - # Signed option - a = ['a-5', 'a7', 'a+2'] - assert natsorted(a) == ['a-5', 'a+2', 'a7'] - assert natsorted(a, signed=False) == ['a7', 'a+2', 'a-5'] - - # Number type == None - a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] - assert natsorted(a) == ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b'] - assert natsorted(a, number_type=None) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] - - # You can mix types with natsorted. This can get around the new - # 'unorderable types' issue with Python 3. - a = [6, 4.5, '7', '2.5', 'a'] - assert natsorted(a) == ['2.5', 4.5, 6, '7', 'a'] - a = [46, '5a5b2', 'af5', '5a5-4'] - assert natsorted(a) == ['5a5-4', '5a5b2', 46, 'af5'] - - # You still can't sort non-iterables - with raises(TypeError) as err: - natsorted(100) - assert str(err.value) == "'int' object is not iterable" - - # natsort will recursively descend into lists of lists so you can sort by the sublist contents. - data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']] - assert natsorted(data) == [['a1', 'a5'], ['a1', 'a40'], ['a2', 'a5'], ['a10', 'a1']] - - # You can pass a key to do non-standard sorting rules - b = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] - assert natsorted(b, key=itemgetter(1)) == [('c', 'num2'), ('a', 'num3'), ('b', 'num5')] - - -def test_versorted(): - - a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] - assert versorted(a) == natsorted(a, number_type=None) - -def test_index_natsorted(): - - # Return the indexes of how the iterable would be sorted. - a = ['num3', 'num5', 'num2'] - b = ['foo', 'bar', 'baz'] - index = index_natsorted(a) - assert index == [2, 0, 1] - assert [a[i] for i in index] == ['num2', 'num3', 'num5'] - assert [b[i] for i in index] == ['baz', 'foo', 'bar'] - - # It accepts a key argument. - c = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] - assert index_natsorted(c, key=itemgetter(1)) == [2, 0, 1] - - # It can avoid "unorderable types" on Python 3 - a = [46, '5a5b2', 'af5', '5a5-4'] - assert index_natsorted(a) == [3, 1, 0, 2] - - -def test_index_versorted(): - - a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] - assert index_versorted(a) == index_natsorted(a, number_type=None) -- cgit v1.2.1 From 7bfdd4c158cf5a2bcce4226d531d02f5adbb7b40 Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Mon, 14 Jul 2014 18:58:46 -0700 Subject: Performance optimizations and fixed recursive bug. By profiling it was found that many of the initial design desicions for natsort had poor performance implications. For small input, these do not make a difference, but for large inputs the penalty can be large. These include small micro-optimizations in natsort_key (such as choosing explicit arguments over argument unpacking, which has noticable overhead when sorting ~100000 items), and python-specific refactoring optimizations (i.e. isinstance vs. exception catching vs. regex checking). Additionally, the default value for the key has been switched from 'lambda x: x' to 'None', which is both a performance optimization and a design improvement. Next, natsort_key had the 'key' option added because it was found that handling this in natsort_key is much faster than in natsort_keygen. Last, the bug that made the natsort_key options not get passed to recursive natsort_key calls has been fixed. --- natsort/natsort.py | 135 +++++++++++++++++++++----------------- test_natsort/profile_natsorted.py | 106 ++++++++++++++++++++++++++++++ test_natsort/test_natsort.py | 11 +--- 3 files changed, 183 insertions(+), 69 deletions(-) create mode 100644 test_natsort/profile_natsorted.py diff --git a/natsort/natsort.py b/natsort/natsort.py index 6f7f0e3..47308f4 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -18,7 +18,6 @@ import re import sys from operator import itemgetter from functools import partial -from numbers import Number from itertools import islice from .py23compat import u_format, py23_basestring, py23_range, py23_str, py23_zip @@ -50,42 +49,39 @@ regex_and_num_function_chooser = { } -def _remove_empty(s): - """Remove empty strings from a list.""" - while True: - try: - s.remove('') - except ValueError: - break - return s - - def _number_finder(s, regex, numconv, py3_safe): """Helper to split numbers""" - # Split. If there are no splits, return now + # Split the input string by numbers. + # If there are no splits, return now. + # If the input is not a string, ValueError is raised. s = regex.split(s) if len(s) == 1: return tuple(s) - # Now convert the numbers to numbers, and leave strings as strings - s = _remove_empty(s) - for i in py23_range(len(s)): - try: - s[i] = numconv(s[i]) - except ValueError: - pass + # Now convert the numbers to numbers, and leave strings as strings. + # Remove empty strings from the list. + # Profiling showed that using regex here is much faster than + # try/except with the numconv function. + r = regex.match + s = [numconv(x) if r(x) else x for x in s if x] # If the list begins with a number, lead with an empty string. # This is used to get around the "unorderable types" issue. + # The most common case will be a string at the front of the + # list, and in that case the try/except method is faster than + # using isinstance. This was chosen at the expense of the less + # common case of a number being at the front of the list. + try: + s[0][0] # str supports indexing, but not numbers + except TypeError: + s = [''] + s + # The _py3_safe function inserts "" between numbers in the list, # and is used to get around "unorderable types" in complex cases. # It is a separate function that needs to be requested specifically # because it is expensive to call. - if not isinstance(s[0], py23_basestring): - return _py3_safe([''] + s) if py3_safe else [''] + s - else: - return _py3_safe(s) if py3_safe else s + return _py3_safe(s) if py3_safe else s def _py3_safe(parsed_list): @@ -95,16 +91,19 @@ def _py3_safe(parsed_list): else: new_list = [parsed_list[0]] nl_append = new_list.append + ntypes = {float, int} for before, after in py23_zip(islice(parsed_list, 0, len(parsed_list)-1), islice(parsed_list, 1, None)): - if isinstance(before, Number) and isinstance(after, Number): + # I realize that isinstance is favored over type, but + # in this case type is SO MUCH FASTER than isinstance!! + if type(before) in ntypes and type(after) in ntypes: nl_append("") nl_append(after) return new_list @u_format -def natsort_key(s, number_type=float, signed=True, exp=True, py3_safe=False): +def natsort_key(val, key=None, number_type=float, signed=True, exp=True, py3_safe=False): """\ Key to sort strings and numbers naturally. @@ -125,6 +124,11 @@ def natsort_key(s, number_type=float, signed=True, exp=True, py3_safe=False): val : {{str, unicode}} The value used by the sorting algorithm + key : callable, optional + A key used to manipulate the input value before parsing for + numbers. It is **not** applied recursively. + It should accept a single argument and return a single value. + number_type : {{None, float, int}}, optional The types of number to sort on: `float` searches for floating point numbers, `int` searches for integers, and `None `searches @@ -206,18 +210,11 @@ def natsort_key(s, number_type=float, signed=True, exp=True, py3_safe=False): ({u}'', 43.0, {u}'h', 7.0, {u}'', 3.0) """ - - # If we are dealing with non-strings, return now - if not isinstance(s, py23_basestring): - if hasattr(s, '__getitem__'): - return tuple(natsort_key(x) for x in s) - else: - return ('', s,) - - # Convert to the proper tuple and return + + # Convert the arguments to the proper input tuple inp_options = (number_type, signed, exp) try: - args = (s,) + regex_and_num_function_chooser[inp_options] + (py3_safe,) + regex, num_function = regex_and_num_function_chooser[inp_options] except KeyError: # Report errors properly if number_type not in (float, int) and number_type is not None: @@ -230,7 +227,27 @@ def natsort_key(s, number_type=float, signed=True, exp=True, py3_safe=False): raise ValueError("natsort_key: 'exp' " "parameter '{0}' invalid".format(py23_str(exp))) else: - return tuple(_number_finder(*args)) + # Apply key if needed. + if key is not None: + val = key(val) + # Assume the input are strings, which is the most common case. + try: + return tuple(_number_finder(val, regex, num_function, py3_safe)) + except TypeError: + # If not strings, assume it is an iterable that must + # be parsed recursively. Do not apply the key recursively. + try: + return tuple([natsort_key(x, None, number_type, signed, + exp, py3_safe) for x in val]) + # If there is still an error, it must be a number. + # Return as-is, with a leading empty string. + # Waiting for two raised errors instead of calling + # isinstance at the opening of the function is slower + # for numbers but much faster for strings, and since + # numbers are not a common input to natsort this is + # an acceptable sacrifice. + except TypeError: + return ('', val,) @u_format @@ -303,28 +320,23 @@ def natsort_keygen(key=None, number_type=float, signed=True, exp=True, py3_safe= arguments to the `natsort_key`. Consider the following equivalent examples; which is more clear? :: - >>> a = [[1, 'num5.10'], [2, 'num-3'], [3, 'num5.3'], [4, 'num2']] + >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] >>> b = a[:] - >>> a.sort(key=lambda x: natsort_key(itemgetter(1)(x), signed=False)) - >>> b.sort(key=natsort_keygen(key=itemgetter(1), signed=False)) + >>> a.sort(key=lambda x: natsort_key(x, key=lambda y: y.upper(), signed=False)) + >>> b.sort(key=natsort_keygen(key=lambda x: x.upper(), signed=False)) >>> a == b True """ - # If no key, simply wrap the function - if key is None: - return partial(natsort_key, number_type=number_type, - signed=signed, - exp=exp, - py3_safe=py3_safe) - # If a key is given, wrap the function and make sure - # the key is called before the natsort_key. - else: - return lambda val: natsort_key(key(val), number_type, signed, exp, py3_safe) + return partial(natsort_key, key=key, + number_type=number_type, + signed=signed, + exp=exp, + py3_safe=py3_safe) @u_format -def natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=True): +def natsorted(seq, key=None, number_type=float, signed=True, exp=True): """\ Sorts a sequence naturally. @@ -396,7 +408,7 @@ def natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=True): @u_format -def versorted(seq, key=lambda x: x): +def versorted(seq, key=None): """\ Convenience function to sort version numbers. @@ -431,11 +443,11 @@ def versorted(seq, key=lambda x: x): [{u}'num3.4.1', {u}'num3.4.2', {u}'num4.0.2'] """ - return natsorted(seq, key=key, number_type=None) + return natsorted(seq, key, None) @u_format -def index_natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=True): +def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True): """\ Return the list of the indexes used to sort the input sequence. @@ -501,26 +513,29 @@ def index_natsorted(seq, key=lambda x: x, number_type=float, signed=True, exp=Tr [{u}'baz', {u}'foo', {u}'bar'] """ - item1 = itemgetter(1) + if key is None: + newkey = itemgetter(1) + else: + newkey = lambda x : key(itemgetter(1)(x)) # Pair the index and sequence together, then sort by element - index_seq_pair = [[x, key(y)] for x, y in py23_zip(py23_range(len(seq)), seq)] + index_seq_pair = [[x, y] for x, y in enumerate(seq)] try: - index_seq_pair.sort(key=natsort_keygen(item1, number_type, + index_seq_pair.sort(key=natsort_keygen(newkey, number_type, signed, exp)) except TypeError as e: # In the event of an unresolved "unorderable types" error # attempt to sort again, being careful to prevent this error. if 'unorderable types' in str(e): - index_seq_pair.sort(key=natsort_keygen(item1, number_type, + index_seq_pair.sort(key=natsort_keygen(newkey, number_type, signed, exp, True)) else: # Re-raise if the problem was not "unorderable types" raise - return [x[0] for x in index_seq_pair] + return [x for x, _ in index_seq_pair] @u_format -def index_versorted(seq, key=lambda x: x): +def index_versorted(seq, key=None): """\ Return the list of the indexes used to sort the input sequence of version numbers. @@ -560,5 +575,5 @@ def index_versorted(seq, key=lambda x: x): [1, 2, 0] """ - return index_natsorted(seq, key=key, number_type=None) + return index_natsorted(seq, key, None) diff --git a/test_natsort/profile_natsorted.py b/test_natsort/profile_natsorted.py new file mode 100644 index 0000000..7978c23 --- /dev/null +++ b/test_natsort/profile_natsorted.py @@ -0,0 +1,106 @@ +from __future__ import print_function +import cProfile +import random +import sys + +from natsort import natsorted, index_natsorted + + +# Sample lists to sort +nums = random.sample(xrange(10000), 1000) +nstr = list(map(str,random.sample(xrange(10000), 1000))) +astr = ['a'+x+'num' for x in map(str,random.sample(xrange(10000), 1000))] +tstr = [['a'+x, 'a-'+x] for x in map(str,random.sample(xrange(10000), 1000))] +cstr = ['a'+x+'-'+x for x in map(str,random.sample(xrange(10000), 1000))] + +''' +def prof_nums(a): + print('*** Basic Call, Numbers ***') + for _ in xrange(1000): + natsorted(a) +cProfile.run('prof_nums(nums)', sort='time') + + +def prof_num_str(a): + print('*** Basic Call, Numbers as Strings ***') + for _ in xrange(1000): + natsorted(a) +cProfile.run('prof_num_str(nstr)', sort='time') +''' + +def prof_str(a): + print('*** Basic Call, Strings ***') + for _ in xrange(1000): + natsorted(a) +cProfile.run('prof_str(astr)', sort='time') + +''' +def prof_str_index(a): + print('*** Basic Index Call ***') + for _ in xrange(1000): + index_natsorted(a) +cProfile.run('prof_str_index(astr)', sort='time') + + +def prof_nested(a): + print('*** Basic Call, Nested Strings ***') + for _ in xrange(1000): + natsorted(a) +cProfile.run('prof_nested(tstr)', sort='time') + + +def prof_str_noexp(a): + print('*** No-Exp Call ***') + for _ in xrange(1000): + natsorted(a, exp=False) +cProfile.run('prof_str_noexp(astr)', sort='time') + + +def prof_str_unsigned(a): + print('*** Unsigned Call ***') + for _ in xrange(1000): + natsorted(a, signed=False) +cProfile.run('prof_str_unsigned(astr)', sort='time') + + +def prof_str_unsigned_noexp(a): + print('*** Unsigned No-Exp Call ***') + for _ in xrange(1000): + natsorted(a, signed=False, exp=False) +cProfile.run('prof_str_unsigned_noexp(astr)', sort='time') + + +def prof_str_asint(a): + print('*** Int Call ***') + for _ in xrange(1000): + natsorted(a, number_type=int) +cProfile.run('prof_str_asint(astr)', sort='time') + + +def prof_str_asint_unsigned(a): + print('*** Unsigned Int (Versions) Call ***') + for _ in xrange(1000): + natsorted(a, number_type=int, signed=False) +cProfile.run('prof_str_asint_unsigned(astr)', sort='time') +''' + +def prof_str_key(a): + print('*** Basic Call With Key ***') + for _ in xrange(1000): + natsorted(a, key=lambda x: x.upper()) +cProfile.run('prof_str_key(astr)', sort='time') +sys.exit() + +def prof_str_index_key(a): + print('*** Basic Index Call With Key ***') + for _ in xrange(1000): + index_natsorted(a, key=lambda x: x.upper()) +cProfile.run('prof_str_index_key(astr)', sort='time') + + +def prof_str_unorderable(a): + print('*** Basic Index Call, "Unorderable" ***') + for _ in xrange(1000): + natsorted(a) +cProfile.run('prof_str_unorderable(cstr)', sort='time') + diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index e7bc921..2d8505c 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -6,18 +6,11 @@ See the README or the natsort homepage for more details. from operator import itemgetter from pytest import raises from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted, natsort_keygen -from natsort.natsort import _remove_empty, _number_finder, _py3_safe +from natsort.natsort import _number_finder, _py3_safe from natsort.natsort import float_sign_exp_re, float_nosign_exp_re, float_sign_noexp_re from natsort.natsort import float_nosign_noexp_re, int_nosign_re, int_sign_re -def test__remove_empty(): - - assert _remove_empty(['a', 2, '', 'b', '']) == ['a', 2, 'b'] - assert _remove_empty(['a', 2, 'b', '']) == ['a', 2, 'b'] - assert _remove_empty(['a', 2, 'b']) == ['a', 2, 'b'] - - def test_number_finder(): assert _number_finder('a5+5.034e-1', float_sign_exp_re, float, False) == ['a', 5.0, 0.5034] @@ -99,7 +92,7 @@ def test_natsort_keygen(): # Custom keys are more straightforward with keygen f1 = natsort_keygen(key=lambda x: x.upper()) - f2 = lambda x: natsort_key(x.upper()) + f2 = lambda x: natsort_key(x, key=lambda y: y.upper()) assert f1(a) == f2(a) # It also makes sorting lists in-place easier (no lambdas!) -- cgit v1.2.1 From f538ea8fc59e8b2290cfd29da8523851b6f441f4 Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Mon, 14 Jul 2014 20:27:00 -0700 Subject: Added the order_by_index function. The order_by_index function is a convenience function that helps the user order a list by the index list that is returned by the *_index functions. Additionally, some import cleanup was done in the natsort.py file. --- natsort/__init__.py | 5 +++- natsort/natsort.py | 71 +++++++++++++++++++++++++++++++++++++++++--- test_natsort/test_natsort.py | 15 +++++++++- 3 files changed, 85 insertions(+), 6 deletions(-) diff --git a/natsort/__init__.py b/natsort/__init__.py index 49693a1..f75b09b 100644 --- a/natsort/__init__.py +++ b/natsort/__init__.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- from __future__ import print_function, division, unicode_literals, absolute_import -from .natsort import natsort_key, natsort_keygen, natsorted, index_natsorted, versorted, index_versorted +from .natsort import natsort_key, natsort_keygen, natsorted, \ + index_natsorted, versorted, index_versorted, \ + order_by_index from ._version import __version__ __all__ = [ @@ -11,5 +13,6 @@ __all__ = [ 'versorted' 'index_natsorted', 'index_versorted', + 'order_by_index', ] diff --git a/natsort/natsort.py b/natsort/natsort.py index 47308f4..a54883e 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -20,9 +20,11 @@ from operator import itemgetter from functools import partial from itertools import islice -from .py23compat import u_format, py23_basestring, py23_range, py23_str, py23_zip +from .py23compat import u_format, py23_basestring, py23_str, \ + py23_range, py23_zip -__doc__ = u_format(__doc__) # Make sure the doctest works for either python2 or python3 +__doc__ = u_format(__doc__) # Make sure the doctest works for either + # python2 or python3 # The regex that locates floats float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)') @@ -494,6 +496,7 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True): See Also -------- natsorted + order_by_index Examples -------- @@ -507,9 +510,9 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True): >>> index [2, 0, 1] >>> # Sort both lists by the sort order of a - >>> [a[i] for i in index] + >>> order_by_index(a, index) [{u}'num2', {u}'num3', {u}'num5'] - >>> [b[i] for i in index] + >>> order_by_index(b, index) [{u}'baz', {u}'foo', {u}'bar'] """ @@ -565,6 +568,7 @@ def index_versorted(seq, key=None): See Also -------- versorted + order_by_index Examples -------- @@ -577,3 +581,62 @@ def index_versorted(seq, key=None): """ return index_natsorted(seq, key, None) + +@u_format +def order_by_index(seq, index, iter=False): + """\ + Order a given sequence by an index sequence. + + The output of `index_natsorted` and `index_versorted` is a + sequence of integers (index) that correspond to how its input + sequence **would** be sorted. The idea is that this index can + be used to reorder multiple sequences by the sorted order of the + first sequence. This function is a convenient wrapper to + apply this ordering to a sequence. + + Parameters + ---------- + seq : iterable + The sequence to order. + + index : iterable + The sequence that indicates how to order `seq`. + It should be the same length as `seq` and consist + of integers only. + + iter : {{True, False}}, optional + If `True`, the ordered sequence is returned as a + generator expression; otherwise it is returned as a + list. The default is `False`. + + Returns + ------- + out : {{list, generator}} + The sequence ordered by `index`, as a `list` or as a + generator expression (depending on the value of `iter`). + + See Also + -------- + index_natsorted + index_versorted + + Examples + -------- + + `order_by_index` is a comvenience function that helps you apply + the result of `index_natsorted` or `index_versorted`:: + + >>> a = ['num3', 'num5', 'num2'] + >>> b = ['foo', 'bar', 'baz'] + >>> index = index_natsorted(a) + >>> index + [2, 0, 1] + >>> # Sort both lists by the sort order of a + >>> order_by_index(a, index) + [{u}'num2', {u}'num3', {u}'num5'] + >>> order_by_index(b, index) + [{u}'baz', {u}'foo', {u}'bar'] + + """ + return (seq[i] for i in index) if iter else [seq[i] for i in index] + diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index 2d8505c..f3ee3b8 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -5,7 +5,7 @@ See the README or the natsort homepage for more details. """ from operator import itemgetter from pytest import raises -from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted, natsort_keygen +from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted, natsort_keygen, order_by_index from natsort.natsort import _number_finder, _py3_safe from natsort.natsort import float_sign_exp_re, float_nosign_exp_re, float_sign_noexp_re from natsort.natsort import float_nosign_noexp_re, int_nosign_re, int_sign_re @@ -174,3 +174,16 @@ def test_index_versorted(): a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] assert index_versorted(a) == index_natsorted(a, number_type=None) + + +def test_order_by_index(): + + # Return the indexes of how the iterable would be sorted. + a = ['num3', 'num5', 'num2'] + index = [2, 0, 1] + assert order_by_index(a, index) == ['num2', 'num3', 'num5'] + assert order_by_index(a, index) == [a[i] for i in index] + assert order_by_index(a, index, True) != [a[i] for i in index] + assert list(order_by_index(a, index, True)) == [a[i] for i in index] + + -- cgit v1.2.1 From fb46d77c99f09503f412fd110dedc10e9f66bca4 Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Mon, 14 Jul 2014 20:36:29 -0700 Subject: Added additional unit tests. Added tests that verify options are passed to sub-functions, and also test new functionality. --- test_natsort/test_natsort.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index f3ee3b8..185867b 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -54,9 +54,13 @@ def test_natsort_key(): assert natsort_key('a-5.034e1', number_type=int) == ('a', -5, '.', 34, 'e', 1) assert natsort_key('a-5.034e1', number_type=int, signed=False) == ('a-', 5, '.', 34, 'e', 1) assert natsort_key('a-5.034e1', number_type=None) == natsort_key('a-5.034e1', number_type=int, signed=False) + assert natsort_key('a-5.034e1', key=lambda x: x.upper()) == ('A', -50.34) # Iterables are parsed recursively so you can sort lists of lists. - assert natsort_key(('a1', 'a10')) == (('a', 1.0), ('a', 10.0)) + assert natsort_key(('a1', 'a-5.034e1')) == (('a', 1.0), ('a', -50.34)) + assert natsort_key(('a1', 'a-5.034e1'), number_type=None) == (('a', 1), ('a-', 5, '.', 34, 'e', 1)) + # A key is applied before recursion, but not in the recursive calls. + assert natsort_key(('a1', 'a-5.034e1'), key=itemgetter(1)) == ('a', -50.34) # Strings that lead with a number get an empty string at the front of the tuple. # This is designed to get around the "unorderable types" issue. @@ -137,7 +141,8 @@ def test_natsorted(): natsorted(100) assert str(err.value) == "'int' object is not iterable" - # natsort will recursively descend into lists of lists so you can sort by the sublist contents. + # natsort will recursively descend into lists of lists so you can + # sort by the sublist contents. data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']] assert natsorted(data) == [['a1', 'a5'], ['a1', 'a40'], ['a2', 'a5'], ['a10', 'a1']] @@ -150,6 +155,9 @@ def test_versorted(): a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] assert versorted(a) == natsorted(a, number_type=None) + a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), ('a', '1.11.4'), ('a', '1.10.1')] + assert versorted(a) == [('a', '1.9.9a'), ('a', '1.9.9b'), ('a', '1.10.1'), ('a', '1.11'), ('a', '1.11.4')] + def test_index_natsorted(): @@ -169,11 +177,17 @@ def test_index_natsorted(): a = [46, '5a5b2', 'af5', '5a5-4'] assert index_natsorted(a) == [3, 1, 0, 2] + # It can sort lists of lists. + data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']] + assert index_natsorted(data) == [0, 1, 3, 2] + def test_index_versorted(): a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] assert index_versorted(a) == index_natsorted(a, number_type=None) + a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), ('a', '1.11.4'), ('a', '1.10.1')] + assert index_versorted(a) == [0, 2, 4, 1, 3] def test_order_by_index(): -- cgit v1.2.1 From dfc22f23d84062dc9abfd6d914beb3277c18cb85 Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Mon, 14 Jul 2014 20:56:18 -0700 Subject: Added a reverse option to natsorted. This option makes natsorted simulate more closely sorted. The reverse option is also added to the index_* functions and versorted. This way the python internals are doing the reversing, not you. The natsort command line program was updated with this change. --- natsort/__main__.py | 12 +++++------- natsort/natsort.py | 44 ++++++++++++++++++++++++++++++++------------ test_natsort/test_natsort.py | 7 +++++++ 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/natsort/__main__.py b/natsort/__main__.py index 7930d1a..ebc4300 100644 --- a/natsort/__main__.py +++ b/natsort/__main__.py @@ -5,10 +5,7 @@ import sys import os import re -from .natsort import natsort_key, natsorted, int_nosign_re, int_sign_re -from .natsort import float_sign_exp_re, float_nosign_exp_re -from .natsort import float_sign_noexp_re, float_nosign_noexp_re -from .natsort import regex_and_num_function_chooser +from .natsort import natsorted, regex_and_num_function_chooser from ._version import __version__ from .py23compat import py23_str @@ -133,7 +130,8 @@ def sort_and_print_entries(entries, args): 'int': int, 'float': float}[args.number_type], 'signed': args.signed, - 'exp': args.exp} + 'exp': args.exp, + 'reverse': args.reverse,} # Pre-remove entries that don't pass the filtering criteria # Make sure we use the same searching algorithm for filtering as for sorting. @@ -154,8 +152,7 @@ def sort_and_print_entries(entries, args): if exclude_entry(entry, exclude, num_function, regex)] # Print off the sorted results - entries.sort(key=lambda x: natsort_key(x, **kwargs), reverse=args.reverse) - for entry in entries: + for entry in natsorted(entries, **kwargs): print(entry) @@ -166,3 +163,4 @@ if __name__ == '__main__': sys.exit(py23_str(a)) except KeyboardInterrupt: sys.exit(1) + diff --git a/natsort/natsort.py b/natsort/natsort.py index a54883e..9177396 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -338,7 +338,7 @@ def natsort_keygen(key=None, number_type=float, signed=True, exp=True, py3_safe= @u_format -def natsorted(seq, key=None, number_type=float, signed=True, exp=True): +def natsorted(seq, key=None, number_type=float, signed=True, exp=True, reverse=False): """\ Sorts a sequence naturally. @@ -376,6 +376,10 @@ def natsorted(seq, key=None, number_type=float, signed=True, exp=True): the number. If `exp = False`, "3.5e5" is interpreted as ``(3.5, "e", 5)``. The default behavior is `exp = True`. + reverse : {{True, False}}, optional + Return the list in reversed sorted order. The default is + `False`. + Returns ------- out: list @@ -396,21 +400,23 @@ def natsorted(seq, key=None, number_type=float, signed=True, exp=True): """ try: - return sorted(seq, key=natsort_keygen(key, number_type, - signed, exp)) + return sorted(seq, reverse=reverse, + key=natsort_keygen(key, number_type, + signed, exp)) except TypeError as e: # In the event of an unresolved "unorderable types" error # attempt to sort again, being careful to prevent this error. if 'unorderable types' in str(e): - return sorted(seq, key=natsort_keygen(key, number_type, - signed, exp, True)) + return sorted(seq, reverse=reverse, + key=natsort_keygen(key, number_type, + signed, exp, True)) else: # Re-raise if the problem was not "unorderable types" raise @u_format -def versorted(seq, key=None): +def versorted(seq, key=None, reverse=False): """\ Convenience function to sort version numbers. @@ -427,6 +433,10 @@ def versorted(seq, key=None): It is **not** applied recursively. It should accept a single argument and return a single value. + reverse : {{True, False}}, optional + Return the list in reversed sorted order. The default is + `False`. + Returns ------- out : list @@ -445,11 +455,11 @@ def versorted(seq, key=None): [{u}'num3.4.1', {u}'num3.4.2', {u}'num4.0.2'] """ - return natsorted(seq, key, None) + return natsorted(seq, key, None, reverse=reverse) @u_format -def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True): +def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, reverse=False): """\ Return the list of the indexes used to sort the input sequence. @@ -488,6 +498,10 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True): the number. If `exp = False`, "3.5e5" is interpreted as ``(3.5, "e", 5)``. The default behavior is `exp = True`. + reverse : {{True, False}}, optional + Return the list in reversed sorted order. The default is + `False`. + Returns ------- out : tuple @@ -523,13 +537,15 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True): # Pair the index and sequence together, then sort by element index_seq_pair = [[x, y] for x, y in enumerate(seq)] try: - index_seq_pair.sort(key=natsort_keygen(newkey, number_type, + index_seq_pair.sort(reverse=reverse, + key=natsort_keygen(newkey, number_type, signed, exp)) except TypeError as e: # In the event of an unresolved "unorderable types" error # attempt to sort again, being careful to prevent this error. if 'unorderable types' in str(e): - index_seq_pair.sort(key=natsort_keygen(newkey, number_type, + index_seq_pair.sort(reverse=reverse, + key=natsort_keygen(newkey, number_type, signed, exp, True)) else: # Re-raise if the problem was not "unorderable types" @@ -538,7 +554,7 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True): @u_format -def index_versorted(seq, key=None): +def index_versorted(seq, key=None, reverse=False): """\ Return the list of the indexes used to sort the input sequence of version numbers. @@ -560,6 +576,10 @@ def index_versorted(seq, key=None): It is **not** applied recursively. It should accept a single argument and return a single value. + reverse : {{True, False}}, optional + Return the list in reversed sorted order. The default is + `False`. + Returns ------- out : tuple @@ -579,7 +599,7 @@ def index_versorted(seq, key=None): [1, 2, 0] """ - return index_natsorted(seq, key, None) + return index_natsorted(seq, key, None, reverse=reverse) @u_format diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index 185867b..31c0bb9 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -150,11 +150,16 @@ def test_natsorted(): b = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] assert natsorted(b, key=itemgetter(1)) == [('c', 'num2'), ('a', 'num3'), ('b', 'num5')] + # Reversing the order is allowed + a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] + assert natsorted(a, reverse=True) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'][::-1] + def test_versorted(): a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] assert versorted(a) == natsorted(a, number_type=None) + assert versorted(a, reverse=True) == versorted(a)[::-1] a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), ('a', '1.11.4'), ('a', '1.10.1')] assert versorted(a) == [('a', '1.9.9a'), ('a', '1.9.9b'), ('a', '1.10.1'), ('a', '1.11'), ('a', '1.11.4')] @@ -168,6 +173,7 @@ def test_index_natsorted(): assert index == [2, 0, 1] assert [a[i] for i in index] == ['num2', 'num3', 'num5'] assert [b[i] for i in index] == ['baz', 'foo', 'bar'] + assert index_natsorted(a, reverse=True) == [1, 0, 2] # It accepts a key argument. c = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] @@ -186,6 +192,7 @@ def test_index_versorted(): a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] assert index_versorted(a) == index_natsorted(a, number_type=None) + assert index_versorted(a, reverse=True) == index_versorted(a)[::-1] a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), ('a', '1.11.4'), ('a', '1.10.1')] assert index_versorted(a) == [0, 2, 4, 1, 3] -- cgit v1.2.1 From a53f15a7593f8a3147b8e9f213eae4074c575af2 Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Mon, 14 Jul 2014 21:28:55 -0700 Subject: Added .coveragerc and coveralls support. This is a bit more formal method to track coverage than printing out the results in TravisCI. The way natsort is set up, 100% coverage is only possible on python3... the python2 branch does not have the problem of unorderable types. --- .coveragerc | 22 ++++++++++++++++++++++ .travis.yml | 5 ++++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..1bbfe9d --- /dev/null +++ b/.coveragerc @@ -0,0 +1,22 @@ +[report] +# Regexes for lines to exclude from consideration +exclude_lines = + # Have to re-enable the standard pragma + pragma: no cover + + # Don't complain if tests don't hit defensive assertion code: + raise AssertionError + raise NotImplementedError + raise$ + + # Don't complain if non-runnable code isn't run: + if 0: + if __name__ == .__main__.: + +ignore_errors = True + +# Files to not perform coverage on +omit = + natsort/__init__.* + natsort/py23compat.* + natsort/_version.* diff --git a/.travis.yml b/.travis.yml index 68055ab..1847b28 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,12 +8,15 @@ python: - pypy install: - pip install pytest-cov +- pip install coveralls - pip install wheel - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi script: -- python -m pytest --cov-report term-missing --cov natsort +- python -m pytest --cov natsort - python -m pytest --doctest-modules natsort - python -m pytest README.rst +after_success: + coveralls deploy: provider: pypi user: SethMMorton -- cgit v1.2.1 From eb08fe5b5274754cc2f3377c7a5f3cc49d9d234f Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Mon, 14 Jul 2014 21:49:45 -0700 Subject: Fixed Python 2.6 set literal bug. The set literal notation ({...}) was not introduced until Python 2.7, so I changed the set creation to set([...]). This set constant creation was moved outside the function for performance considerations. Also added a coverage badge to the README, and removed pypy from the TravisCI config file (I was getting segfaults with py.test). --- .travis.yml | 1 - README.rst | 3 +++ natsort/natsort.py | 6 +++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1847b28..4d09ad0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,7 +5,6 @@ python: - 3.2 - 3.3 - 3.4 -- pypy install: - pip install pytest-cov - pip install coveralls diff --git a/README.rst b/README.rst index a48f758..18274fd 100644 --- a/README.rst +++ b/README.rst @@ -4,6 +4,9 @@ natsort .. image:: https://travis-ci.org/SethMMorton/natsort.svg?branch=develop :target: https://travis-ci.org/SethMMorton/natsort +.. image:: https://coveralls.io/repos/SethMMorton/natsort/badge.png?branch=develop + :target: https://coveralls.io/r/SethMMorton/natsort?branch=develop + Natural sorting for python. ``natsort`` requires python version 2.6 or greater (this includes python 3.x). To run version 2.6, 3.0, or 3.1 the `argparse `_ module is required. diff --git a/natsort/natsort.py b/natsort/natsort.py index 9177396..628a607 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -50,6 +50,10 @@ regex_and_num_function_chooser = { (None, False, False) : (int_nosign_re, int), } +# Number types. I have to use set([...]) and not {...} +# because I am supporting Python 2.6. +number_types = set([float, int]) + def _number_finder(s, regex, numconv, py3_safe): """Helper to split numbers""" @@ -93,7 +97,7 @@ def _py3_safe(parsed_list): else: new_list = [parsed_list[0]] nl_append = new_list.append - ntypes = {float, int} + ntypes = number_types for before, after in py23_zip(islice(parsed_list, 0, len(parsed_list)-1), islice(parsed_list, 1, None)): # I realize that isinstance is favored over type, but -- cgit v1.2.1 From 26a5f6c89bbd6b01f5addbcfc4550a514cee0c6d Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Wed, 16 Jul 2014 22:02:44 -0700 Subject: Prepared for depreciation of natsort_key. The natsort_key function was changed to _natsort_key, and a new natsort_key function was created that is a wrapper for _natsort_key but also will raise a DepreciationWarning via the warnings module. --- natsort/natsort.py | 127 +++++++++++++++++++++++++++---------------- test_natsort/test_natsort.py | 84 +++++++++++++++++----------- 2 files changed, 131 insertions(+), 80 deletions(-) diff --git a/natsort/natsort.py b/natsort/natsort.py index 628a607..f8b6353 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -19,6 +19,7 @@ import sys from operator import itemgetter from functools import partial from itertools import islice +from warnings import warn from .py23compat import u_format, py23_basestring, py23_str, \ py23_range, py23_zip @@ -108,6 +109,69 @@ def _py3_safe(parsed_list): return new_list +def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, py3_safe=False): + """\ + Key to sort strings and numbers naturally. + + It works by separating out the numbers from the strings. This function for + internal use only. See the natsort_keygen documentation for details of each + parameter. + + Parameters + ---------- + val : {str, unicode} + key : callable, optional + number_type : {None, float, int}, optional + signed : {True, False}, optional + exp : {True, False}, optional + py3_safe : {True, False}, optional + + Returns + ------- + out : tuple + The modified value with numbers extracted. + + """ + + # Convert the arguments to the proper input tuple + inp_options = (number_type, signed, exp) + try: + regex, num_function = regex_and_num_function_chooser[inp_options] + except KeyError: + # Report errors properly + if number_type not in (float, int) and number_type is not None: + raise ValueError("_natsort_key: 'number_type' " + "parameter '{0}' invalid".format(py23_str(number_type))) + elif signed not in (True, False): + raise ValueError("_natsort_key: 'signed' " + "parameter '{0}' invalid".format(py23_str(signed))) + elif exp not in (True, False): + raise ValueError("_natsort_key: 'exp' " + "parameter '{0}' invalid".format(py23_str(exp))) + else: + # Apply key if needed. + if key is not None: + val = key(val) + # Assume the input are strings, which is the most common case. + try: + return tuple(_number_finder(val, regex, num_function, py3_safe)) + except TypeError: + # If not strings, assume it is an iterable that must + # be parsed recursively. Do not apply the key recursively. + try: + return tuple([_natsort_key(x, None, number_type, signed, + exp, py3_safe) for x in val]) + # If there is still an error, it must be a number. + # Return as-is, with a leading empty string. + # Waiting for two raised errors instead of calling + # isinstance at the opening of the function is slower + # for numbers but much faster for strings, and since + # numbers are not a common input to natsort this is + # an acceptable sacrifice. + except TypeError: + return ('', val,) + + @u_format def natsort_key(val, key=None, number_type=float, signed=True, exp=True, py3_safe=False): """\ @@ -117,13 +181,15 @@ def natsort_key(val, key=None, number_type=float, signed=True, exp=True, py3_saf It is designed for use in passing to the 'sorted' builtin or 'sort' attribute of lists. - .. note:: Depreciation Notice (3.3.1) + .. note:: Depreciation Notice (3.4.0) This function remains in the publicly exposed API for backwards-compatibility reasons, but future development - should use the newer `natsort_keygen` function. There - are no plans to officially remove this method from the - public API, but it leads to messier code than using - `natsort_keygen` so the latter should be preferred. + should use the newer `natsort_keygen` function. It is + planned to remove this from the public API in natsort + version 4.0.0. A DeprecationWarning will be raised + via the warnings module; set warnings.simplefilter("always") + to raise them to see if your code will work in version + 4.0.0. Parameters ---------- @@ -216,44 +282,9 @@ def natsort_key(val, key=None, number_type=float, signed=True, exp=True, py3_saf ({u}'', 43.0, {u}'h', 7.0, {u}'', 3.0) """ - - # Convert the arguments to the proper input tuple - inp_options = (number_type, signed, exp) - try: - regex, num_function = regex_and_num_function_chooser[inp_options] - except KeyError: - # Report errors properly - if number_type not in (float, int) and number_type is not None: - raise ValueError("natsort_key: 'number_type' " - "parameter '{0}' invalid".format(py23_str(number_type))) - elif signed not in (True, False): - raise ValueError("natsort_key: 'signed' " - "parameter '{0}' invalid".format(py23_str(signed))) - elif exp not in (True, False): - raise ValueError("natsort_key: 'exp' " - "parameter '{0}' invalid".format(py23_str(exp))) - else: - # Apply key if needed. - if key is not None: - val = key(val) - # Assume the input are strings, which is the most common case. - try: - return tuple(_number_finder(val, regex, num_function, py3_safe)) - except TypeError: - # If not strings, assume it is an iterable that must - # be parsed recursively. Do not apply the key recursively. - try: - return tuple([natsort_key(x, None, number_type, signed, - exp, py3_safe) for x in val]) - # If there is still an error, it must be a number. - # Return as-is, with a leading empty string. - # Waiting for two raised errors instead of calling - # isinstance at the opening of the function is slower - # for numbers but much faster for strings, and since - # numbers are not a common input to natsort this is - # an acceptable sacrifice. - except TypeError: - return ('', val,) + msg = "natsort_key is depreciated as of 3.4.0, please use natsort_keygen" + warn(msg, DeprecationWarning) + return _natsort_key(val, key, number_type, signed, exp, py3_safe) @u_format @@ -334,11 +365,11 @@ def natsort_keygen(key=None, number_type=float, signed=True, exp=True, py3_safe= True """ - return partial(natsort_key, key=key, - number_type=number_type, - signed=signed, - exp=exp, - py3_safe=py3_safe) + return partial(_natsort_key, key=key, + number_type=number_type, + signed=signed, + exp=exp, + py3_safe=py3_safe) @u_format diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index 31c0bb9..764e611 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -3,10 +3,11 @@ Here are a collection of examples of how this module can be used. See the README or the natsort homepage for more details. """ +import warnings from operator import itemgetter from pytest import raises from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted, natsort_keygen, order_by_index -from natsort.natsort import _number_finder, _py3_safe +from natsort.natsort import _number_finder, _py3_safe, _natsort_key from natsort.natsort import float_sign_exp_re, float_nosign_exp_re, float_sign_noexp_re from natsort.natsort import float_nosign_noexp_re, int_nosign_re, int_sign_re @@ -39,64 +40,83 @@ def test_py3_safe(): assert _py3_safe([5, 9]) == [5, '', 9] -def test_natsort_key(): +def test_natsort_key_private(): a = ['num3', 'num5', 'num2'] - a.sort(key=natsort_key) + a.sort(key=_natsort_key) assert a == ['num2', 'num3', 'num5'] # The below illustrates how the key works, and how the different options affect sorting. - assert natsort_key('a-5.034e1') == ('a', -50.34) - assert natsort_key('a-5.034e1', number_type=float, signed=True, exp=True) == ('a', -50.34) - assert natsort_key('a-5.034e1', number_type=float, signed=True, exp=False) == ('a', -5.034, 'e', 1.0) - assert natsort_key('a-5.034e1', number_type=float, signed=False, exp=True) == ('a-', 50.34) - assert natsort_key('a-5.034e1', number_type=float, signed=False, exp=False) == ('a-', 5.034, 'e', 1.0) - assert natsort_key('a-5.034e1', number_type=int) == ('a', -5, '.', 34, 'e', 1) - assert natsort_key('a-5.034e1', number_type=int, signed=False) == ('a-', 5, '.', 34, 'e', 1) - assert natsort_key('a-5.034e1', number_type=None) == natsort_key('a-5.034e1', number_type=int, signed=False) - assert natsort_key('a-5.034e1', key=lambda x: x.upper()) == ('A', -50.34) + assert _natsort_key('a-5.034e1') == ('a', -50.34) + assert _natsort_key('a-5.034e1', number_type=float, signed=True, exp=True) == ('a', -50.34) + assert _natsort_key('a-5.034e1', number_type=float, signed=True, exp=False) == ('a', -5.034, 'e', 1.0) + assert _natsort_key('a-5.034e1', number_type=float, signed=False, exp=True) == ('a-', 50.34) + assert _natsort_key('a-5.034e1', number_type=float, signed=False, exp=False) == ('a-', 5.034, 'e', 1.0) + assert _natsort_key('a-5.034e1', number_type=int) == ('a', -5, '.', 34, 'e', 1) + assert _natsort_key('a-5.034e1', number_type=int, signed=False) == ('a-', 5, '.', 34, 'e', 1) + assert _natsort_key('a-5.034e1', number_type=None) == _natsort_key('a-5.034e1', number_type=int, signed=False) + assert _natsort_key('a-5.034e1', key=lambda x: x.upper()) == ('A', -50.34) # Iterables are parsed recursively so you can sort lists of lists. - assert natsort_key(('a1', 'a-5.034e1')) == (('a', 1.0), ('a', -50.34)) - assert natsort_key(('a1', 'a-5.034e1'), number_type=None) == (('a', 1), ('a-', 5, '.', 34, 'e', 1)) + assert _natsort_key(('a1', 'a-5.034e1')) == (('a', 1.0), ('a', -50.34)) + assert _natsort_key(('a1', 'a-5.034e1'), number_type=None) == (('a', 1), ('a-', 5, '.', 34, 'e', 1)) # A key is applied before recursion, but not in the recursive calls. - assert natsort_key(('a1', 'a-5.034e1'), key=itemgetter(1)) == ('a', -50.34) + assert _natsort_key(('a1', 'a-5.034e1'), key=itemgetter(1)) == ('a', -50.34) # Strings that lead with a number get an empty string at the front of the tuple. # This is designed to get around the "unorderable types" issue. - assert natsort_key(('15a', '6')) == (('', 15.0, 'a'), ('', 6.0)) - assert natsort_key(10) == ('', 10) + assert _natsort_key(('15a', '6')) == (('', 15.0, 'a'), ('', 6.0)) + assert _natsort_key(10) == ('', 10) # Turn on py3_safe to put a '' between adjacent numbers - assert natsort_key('43h7+3', py3_safe=True) == ('', 43.0, 'h', 7.0, '', 3.0) + assert _natsort_key('43h7+3', py3_safe=True) == ('', 43.0, 'h', 7.0, '', 3.0) # Invalid arguments give the correct response with raises(ValueError) as err: - natsort_key('a', number_type='float') - assert str(err.value) == "natsort_key: 'number_type' parameter 'float' invalid" + _natsort_key('a', number_type='float') + assert str(err.value) == "_natsort_key: 'number_type' parameter 'float' invalid" with raises(ValueError) as err: - natsort_key('a', signed='True') - assert str(err.value) == "natsort_key: 'signed' parameter 'True' invalid" + _natsort_key('a', signed='True') + assert str(err.value) == "_natsort_key: 'signed' parameter 'True' invalid" with raises(ValueError) as err: - natsort_key('a', exp='False') - assert str(err.value) == "natsort_key: 'exp' parameter 'False' invalid" + _natsort_key('a', exp='False') + assert str(err.value) == "_natsort_key: 'exp' parameter 'False' invalid" + + +def test_natsort_key_public(): + + # Identical to _natsort_key + # But it raises a depreciation warning + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert natsort_key('a-5.034e1') == _natsort_key('a-5.034e1') + assert len(w) == 1 + assert "natsort_key is depreciated as of 3.4.0, please use natsort_keygen" in str(w[-1].message) + assert natsort_key('a-5.034e1', number_type=float, signed=False, exp=False) == _natsort_key('a-5.034e1', number_type=float, signed=False, exp=False) + + # It is called for each element in a list when sorting + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6'] + a.sort(key=natsort_key) + assert len(w) == 7 def test_natsort_keygen(): # Creates equivalent natsort keys a = 'a-5.034e1' - assert natsort_keygen()(a) == natsort_key(a) - assert natsort_keygen(signed=False)(a) == natsort_key(a, signed=False) - assert natsort_keygen(exp=False)(a) == natsort_key(a, exp=False) - assert natsort_keygen(signed=False, exp=False)(a) == natsort_key(a, signed=False, exp=False) - assert natsort_keygen(number_type=int)(a) == natsort_key(a, number_type=int) - assert natsort_keygen(number_type=int, signed=False)(a) == natsort_key(a, number_type=int, signed=False) - assert natsort_keygen(number_type=None)(a) == natsort_key(a, number_type=None) + assert natsort_keygen()(a) == _natsort_key(a) + assert natsort_keygen(signed=False)(a) == _natsort_key(a, signed=False) + assert natsort_keygen(exp=False)(a) == _natsort_key(a, exp=False) + assert natsort_keygen(signed=False, exp=False)(a) == _natsort_key(a, signed=False, exp=False) + assert natsort_keygen(number_type=int)(a) == _natsort_key(a, number_type=int) + assert natsort_keygen(number_type=int, signed=False)(a) == _natsort_key(a, number_type=int, signed=False) + assert natsort_keygen(number_type=None)(a) == _natsort_key(a, number_type=None) # Custom keys are more straightforward with keygen f1 = natsort_keygen(key=lambda x: x.upper()) - f2 = lambda x: natsort_key(x, key=lambda y: y.upper()) + f2 = lambda x: _natsort_key(x, key=lambda y: y.upper()) assert f1(a) == f2(a) # It also makes sorting lists in-place easier (no lambdas!) -- cgit v1.2.1 From 0de0222fa23a6f2b2a553967d287dfc42e44d78a Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Mon, 14 Jul 2014 22:20:46 -0700 Subject: Documented changes in README. This includes changes that have not yet been made, but will be for the official release. Also uncommented all calls in profile code. --- README.rst | 28 ++++++++++++++++++++++++++++ test_natsort/profile_natsorted.py | 11 ++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 18274fd..e88944a 100644 --- a/README.rst +++ b/README.rst @@ -412,6 +412,34 @@ Seth M. Morton History ------- +XX-XX-2014 v. 3.4.0 +''''''''''''''''''' + + - Fixed a bug that caused user's options to the 'natsort_key' to not be + passed on to recursive calls of 'natsort_key'. + - Added a 'natsort_keygen' function that will generate a wrapped version + of 'natsort_key' that is easier to call. 'natsort_key' is now set to + depreciate at natsort version 4.0.0. + - Added an 'as_path' option to 'natsorted' and co. that will try to treat + input strings as filepaths. This will help yield correct results for + OS-generated inputs like + ``['/p/q/o.x', '/p/q (1)/o.x', '/p/q (10)/o.x', '/p/q/o (1).x']``. + - Massive performance enhancements for string input (1.8x-2.0x), at the expense + of reduction in speed for numeric input (~2.0x). + + - This is a good compromise because the most common input will be strings, + not numbers. If you are sorting only numbers, you would use 'sorted'. + - Sorting numbers still only takes 0.6x the time of sorting strings. + + - Added the 'order_by_index' function to help in using the output of + 'index_natsorted' and 'index_versorted'. + - Added the 'reverse' option to 'natsorted' and co. to make it's API more + similar to the builtin 'sorted'. + - Added more unit tests. + - Added auxillary test code that helps in profiling and stress-testing. + - Reworked the documentation, moving most of it to PyPI's hosting platform. + - Added support for coveralls.io. + 06-28-2014 v. 3.3.0 ''''''''''''''''''' diff --git a/test_natsort/profile_natsorted.py b/test_natsort/profile_natsorted.py index 7978c23..dc52bf8 100644 --- a/test_natsort/profile_natsorted.py +++ b/test_natsort/profile_natsorted.py @@ -3,6 +3,7 @@ import cProfile import random import sys +sys.path.insert(0, '.') from natsort import natsorted, index_natsorted @@ -13,7 +14,7 @@ astr = ['a'+x+'num' for x in map(str,random.sample(xrange(10000), 1000))] tstr = [['a'+x, 'a-'+x] for x in map(str,random.sample(xrange(10000), 1000))] cstr = ['a'+x+'-'+x for x in map(str,random.sample(xrange(10000), 1000))] -''' + def prof_nums(a): print('*** Basic Call, Numbers ***') for _ in xrange(1000): @@ -26,7 +27,7 @@ def prof_num_str(a): for _ in xrange(1000): natsorted(a) cProfile.run('prof_num_str(nstr)', sort='time') -''' + def prof_str(a): print('*** Basic Call, Strings ***') @@ -34,7 +35,7 @@ def prof_str(a): natsorted(a) cProfile.run('prof_str(astr)', sort='time') -''' + def prof_str_index(a): print('*** Basic Index Call ***') for _ in xrange(1000): @@ -82,14 +83,14 @@ def prof_str_asint_unsigned(a): for _ in xrange(1000): natsorted(a, number_type=int, signed=False) cProfile.run('prof_str_asint_unsigned(astr)', sort='time') -''' + def prof_str_key(a): print('*** Basic Call With Key ***') for _ in xrange(1000): natsorted(a, key=lambda x: x.upper()) cProfile.run('prof_str_key(astr)', sort='time') -sys.exit() + def prof_str_index_key(a): print('*** Basic Index Call With Key ***') -- cgit v1.2.1 From d3bd9e4496e75625e8721e9c7eb82d5a896c7ad6 Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Wed, 16 Jul 2014 23:37:48 -0700 Subject: Added the as_path option to natsorted & co. The as_path option allows the user to specify that the input strings should be interpreted as file paths, and thus split on the path separator. It also splits on the file extensions. This helps sorting some OS-generated file names like "Folder (1)/" and "Folder/", where "Folder/" should come first, but without as_path it would come last. --- natsort/natsort.py | 151 ++++++++++++++++++++++++++++++++++++++----- test_natsort/test_natsort.py | 51 +++++++++++++++ 2 files changed, 187 insertions(+), 15 deletions(-) diff --git a/natsort/natsort.py b/natsort/natsort.py index f8b6353..8ac3212 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -16,6 +16,8 @@ from __future__ import print_function, division, unicode_literals, absolute_impo import re import sys +from os import curdir, pardir +from os.path import split, splitext from operator import itemgetter from functools import partial from itertools import islice @@ -55,6 +57,9 @@ regex_and_num_function_chooser = { # because I am supporting Python 2.6. number_types = set([float, int]) +# This regex is to make sure we don't mistake a number for a file extension +decimal = re.compile(r'\.\d') + def _number_finder(s, regex, numconv, py3_safe): """Helper to split numbers""" @@ -91,6 +96,44 @@ def _number_finder(s, regex, numconv, py3_safe): return _py3_safe(s) if py3_safe else s +def _path_splitter(s): + """Split a string into its path components. Assumes a string is a path.""" + path_parts = [] + p_append = path_parts.append + path_location = s + # Continue splitting the path from the back until we have reached + # '..' or '.', or until there is nothing left to split. + while path_location != curdir and path_location != pardir: + parent_path = path_location + path_location, child_path = split(parent_path) + if path_location == parent_path: + break + p_append(child_path) + # This last append is the base path. Only append if the string is non-empty. + if path_location: + p_append(path_location) + # We created this list in reversed order, so we now correct the order. + path_parts.reverse() + # Now, split off the file extensions using a similar method to above. + # Continue splitting off file extensions until we reach a decimal number + # or there are no more extensions. + base = path_parts.pop() + base_parts = [] + b_append = base_parts.append + d_match = decimal.match + while True: + front = base + base, ext = splitext(front) + if d_match(ext) or not ext: + base = front # Reset base to before the split if the split is invalid. + break + b_append(ext) + b_append(base) + base_parts.reverse() + # Return the split parent paths and then the split basename. + return path_parts + base_parts + + def _py3_safe(parsed_list): """Insert '' between two numbers.""" if len(parsed_list) < 2: @@ -109,7 +152,8 @@ def _py3_safe(parsed_list): return new_list -def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, py3_safe=False): +def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, + as_path=False, py3_safe=False): """\ Key to sort strings and numbers naturally. @@ -124,6 +168,7 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, py3_sa number_type : {None, float, int}, optional signed : {True, False}, optional exp : {True, False}, optional + as_path : {True, False}, optional py3_safe : {True, False}, optional Returns @@ -152,15 +197,30 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, py3_sa # Apply key if needed. if key is not None: val = key(val) + + # If this is a path, convert it. An AttrubuteError is raised if not a string. + split_as_path = False + if as_path: + try: + val = _path_splitter(val) + except AttributeError: + pass + else: + # Record that this string was split as a path so that + # we can set as_path to False in the recursive call. + split_as_path = True + # Assume the input are strings, which is the most common case. try: return tuple(_number_finder(val, regex, num_function, py3_safe)) except TypeError: # If not strings, assume it is an iterable that must # be parsed recursively. Do not apply the key recursively. + # If this string was split as a path, set as_path to False. try: return tuple([_natsort_key(x, None, number_type, signed, - exp, py3_safe) for x in val]) + exp, as_path and not split_as_path, + py3_safe) for x in val]) # If there is still an error, it must be a number. # Return as-is, with a leading empty string. # Waiting for two raised errors instead of calling @@ -173,7 +233,8 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, py3_sa @u_format -def natsort_key(val, key=None, number_type=float, signed=True, exp=True, py3_safe=False): +def natsort_key(val, key=None, number_type=float, signed=True, exp=True, + as_path=False, py3_safe=False): """\ Key to sort strings and numbers naturally. @@ -221,6 +282,15 @@ def natsort_key(val, key=None, number_type=float, signed=True, exp=True, py3_saf the number. If `exp = False`, "3.5e5" is interpreted as ``(3.5, "e", 5)``. The default behavior is `exp = True`. + as_path : {{True, False}}, optional + This option will force strings to be interpreted as filesystem + paths, so they will be split according to the filesystem separator + (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the + file extension, if any. Without this, lists of file paths like + ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted + properly; ``'Folder'`` will be placed at the end, not at the front. + The default behavior is `as_path = False`. + py3_safe : {{True, False}}, optional This will make the string parsing algorithm be more careful by placing an empty string between two adjacent numbers after the @@ -284,11 +354,12 @@ def natsort_key(val, key=None, number_type=float, signed=True, exp=True, py3_saf """ msg = "natsort_key is depreciated as of 3.4.0, please use natsort_keygen" warn(msg, DeprecationWarning) - return _natsort_key(val, key, number_type, signed, exp, py3_safe) + return _natsort_key(val, key, number_type, signed, exp, as_path, py3_safe) @u_format -def natsort_keygen(key=None, number_type=float, signed=True, exp=True, py3_safe=False): +def natsort_keygen(key=None, number_type=float, signed=True, exp=True, + as_path=False, py3_safe=False): """\ Generate a key to sort strings and numbers naturally. @@ -327,6 +398,15 @@ def natsort_keygen(key=None, number_type=float, signed=True, exp=True, py3_safe= the number. If `exp = False`, "3.5e5" is interpreted as ``(3.5, "e", 5)``. The default behavior is `exp = True`. + as_path : {{True, False}}, optional + This option will force strings to be interpreted as filesystem + paths, so they will be split according to the filesystem separator + (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the + file extension, if any. Without this, lists with file paths like + ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted + properly; ``'Folder'`` will be placed at the end, not at the front. + The default behavior is `as_path = False`. + py3_safe : {{True, False}}, optional This will make the string parsing algorithm be more careful by placing an empty string between two adjacent numbers after the @@ -369,11 +449,13 @@ def natsort_keygen(key=None, number_type=float, signed=True, exp=True, py3_safe= number_type=number_type, signed=signed, exp=exp, + as_path=as_path, py3_safe=py3_safe) @u_format -def natsorted(seq, key=None, number_type=float, signed=True, exp=True, reverse=False): +def natsorted(seq, key=None, number_type=float, signed=True, exp=True, + reverse=False, as_path=False): """\ Sorts a sequence naturally. @@ -415,6 +497,15 @@ def natsorted(seq, key=None, number_type=float, signed=True, exp=True, reverse=F Return the list in reversed sorted order. The default is `False`. + as_path : {{True, False}}, optional + This option will force strings to be interpreted as filesystem + paths, so they will be split according to the filesystem separator + (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the + file extension, if any. Without this, lists of file paths like + ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted + properly; ``'Folder'`` will be placed at the end, not at the front. + The default behavior is `as_path = False`. + Returns ------- out: list @@ -437,21 +528,22 @@ def natsorted(seq, key=None, number_type=float, signed=True, exp=True, reverse=F try: return sorted(seq, reverse=reverse, key=natsort_keygen(key, number_type, - signed, exp)) + signed, exp, as_path)) except TypeError as e: # In the event of an unresolved "unorderable types" error # attempt to sort again, being careful to prevent this error. if 'unorderable types' in str(e): return sorted(seq, reverse=reverse, key=natsort_keygen(key, number_type, - signed, exp, True)) + signed, exp, as_path, + True)) else: # Re-raise if the problem was not "unorderable types" raise @u_format -def versorted(seq, key=None, reverse=False): +def versorted(seq, key=None, reverse=False, as_path=False): """\ Convenience function to sort version numbers. @@ -472,6 +564,15 @@ def versorted(seq, key=None, reverse=False): Return the list in reversed sorted order. The default is `False`. + as_path : {{True, False}}, optional + This option will force strings to be interpreted as filesystem + paths, so they will be split according to the filesystem separator + (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the + file extension, if any. Without this, lists of file paths like + ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted + properly; ``'Folder'`` will be placed at the end, not at the front. + The default behavior is `as_path = False`. + Returns ------- out : list @@ -490,11 +591,12 @@ def versorted(seq, key=None, reverse=False): [{u}'num3.4.1', {u}'num3.4.2', {u}'num4.0.2'] """ - return natsorted(seq, key, None, reverse=reverse) + return natsorted(seq, key, None, reverse=reverse, as_path=as_path) @u_format -def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, reverse=False): +def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, + reverse=False, as_path=False): """\ Return the list of the indexes used to sort the input sequence. @@ -537,6 +639,15 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, rev Return the list in reversed sorted order. The default is `False`. + as_path : {{True, False}}, optional + This option will force strings to be interpreted as filesystem + paths, so they will be split according to the filesystem separator + (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the + file extension, if any. Without this, lists of file paths like + ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted + properly; ``'Folder'`` will be placed at the end, not at the front. + The default behavior is `as_path = False`. + Returns ------- out : tuple @@ -574,14 +685,15 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, rev try: index_seq_pair.sort(reverse=reverse, key=natsort_keygen(newkey, number_type, - signed, exp)) + signed, exp, as_path)) except TypeError as e: # In the event of an unresolved "unorderable types" error # attempt to sort again, being careful to prevent this error. if 'unorderable types' in str(e): index_seq_pair.sort(reverse=reverse, key=natsort_keygen(newkey, number_type, - signed, exp, True)) + signed, exp, as_path, + True)) else: # Re-raise if the problem was not "unorderable types" raise @@ -589,7 +701,7 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, rev @u_format -def index_versorted(seq, key=None, reverse=False): +def index_versorted(seq, key=None, reverse=False, as_path=False): """\ Return the list of the indexes used to sort the input sequence of version numbers. @@ -615,6 +727,15 @@ def index_versorted(seq, key=None, reverse=False): Return the list in reversed sorted order. The default is `False`. + as_path : {{True, False}}, optional + This option will force strings to be interpreted as filesystem + paths, so they will be split according to the filesystem separator + (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the + file extension, if any. Without this, lists of file paths like + ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted + properly; ``'Folder'`` will be placed at the end, not at the front. + The default behavior is `as_path = False`. + Returns ------- out : tuple @@ -634,7 +755,7 @@ def index_versorted(seq, key=None, reverse=False): [1, 2, 0] """ - return index_natsorted(seq, key, None, reverse=reverse) + return index_natsorted(seq, key, None, reverse=reverse, as_path=as_path) @u_format diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index 764e611..531350d 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -68,6 +68,15 @@ def test_natsort_key_private(): assert _natsort_key(('15a', '6')) == (('', 15.0, 'a'), ('', 6.0)) assert _natsort_key(10) == ('', 10) + # Turn on as_path to split a file path into components + assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', as_path=True) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) + assert _natsort_key('../Folder (10)/file (2).tar.gz', as_path=True) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) + assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', as_path=True) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) + + # It gracefully handles as_path for numeric input. It also handles recursion well. + assert _natsort_key(10, as_path=True) == ('', 10) + assert _natsort_key(('/Folder', '/Folder (1)'), as_path=True) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')'))) + # Turn on py3_safe to put a '' between adjacent numbers assert _natsort_key('43h7+3', py3_safe=True) == ('', 43.0, 'h', 7.0, '', 3.0) @@ -113,6 +122,7 @@ def test_natsort_keygen(): assert natsort_keygen(number_type=int)(a) == _natsort_key(a, number_type=int) assert natsort_keygen(number_type=int, signed=False)(a) == _natsort_key(a, number_type=int, signed=False) assert natsort_keygen(number_type=None)(a) == _natsort_key(a, number_type=None) + assert natsort_keygen(as_path=True)(a) == _natsort_key(a, as_path=True) # Custom keys are more straightforward with keygen f1 = natsort_keygen(key=lambda x: x.upper()) @@ -174,6 +184,20 @@ def test_natsorted(): a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] assert natsorted(a, reverse=True) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'][::-1] + # Sorting paths just got easier + a = ['/p/Folder (10)/file.tar.gz', + '/p/Folder/file.tar.gz', + '/p/Folder (1)/file (1).tar.gz', + '/p/Folder (1)/file.tar.gz',] + assert natsorted(a) == ['/p/Folder (1)/file (1).tar.gz', + '/p/Folder (1)/file.tar.gz', + '/p/Folder (10)/file.tar.gz', + '/p/Folder/file.tar.gz',] + assert natsorted(a, as_path=True) == ['/p/Folder/file.tar.gz', + '/p/Folder (1)/file.tar.gz', + '/p/Folder (1)/file (1).tar.gz', + '/p/Folder (10)/file.tar.gz',] + def test_versorted(): @@ -183,6 +207,20 @@ def test_versorted(): a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), ('a', '1.11.4'), ('a', '1.10.1')] assert versorted(a) == [('a', '1.9.9a'), ('a', '1.9.9b'), ('a', '1.10.1'), ('a', '1.11'), ('a', '1.11.4')] + # Sorting paths just got easier + a = ['/p/Folder (10)/file1.1.0.tar.gz', + '/p/Folder/file1.1.0.tar.gz', + '/p/Folder (1)/file1.1.0 (1).tar.gz', + '/p/Folder (1)/file1.1.0.tar.gz',] + assert versorted(a) == ['/p/Folder (1)/file1.1.0 (1).tar.gz', + '/p/Folder (1)/file1.1.0.tar.gz', + '/p/Folder (10)/file1.1.0.tar.gz', + '/p/Folder/file1.1.0.tar.gz',] + assert versorted(a, as_path=True) == ['/p/Folder/file1.1.0.tar.gz', + '/p/Folder (1)/file1.1.0.tar.gz', + '/p/Folder (1)/file1.1.0 (1).tar.gz', + '/p/Folder (10)/file1.1.0.tar.gz',] + def test_index_natsorted(): @@ -207,6 +245,12 @@ def test_index_natsorted(): data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']] assert index_natsorted(data) == [0, 1, 3, 2] + # It can sort paths too + a = ['/p/Folder (10)/', + '/p/Folder/', + '/p/Folder (1)/',] + assert index_natsorted(a, as_path=True) == [1, 2, 0] + def test_index_versorted(): @@ -216,6 +260,13 @@ def test_index_versorted(): a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), ('a', '1.11.4'), ('a', '1.10.1')] assert index_versorted(a) == [0, 2, 4, 1, 3] + # It can sort paths too + a = ['/p/Folder (10)/file1.1.0.tar.gz', + '/p/Folder/file1.1.0.tar.gz', + '/p/Folder (1)/file1.1.0 (1).tar.gz', + '/p/Folder (1)/file1.1.0.tar.gz',] + assert index_versorted(a, as_path=True) == [1, 3, 2, 0] + def test_order_by_index(): -- cgit v1.2.1 From 67cd7f03b56d3656a73cfc9d3474a205fbf6db1e Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Thu, 17 Jul 2014 19:31:15 -0700 Subject: Numbers and strings sort with as_path. To do this, numbers had to be wrapped in an extra tuple from natsort_key when called with as_path, so that there was the same level of tuples for both strings and numbers. --- natsort/natsort.py | 2 +- test_natsort/test_natsort.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/natsort/natsort.py b/natsort/natsort.py index 8ac3212..bcbd1f0 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -229,7 +229,7 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, # numbers are not a common input to natsort this is # an acceptable sacrifice. except TypeError: - return ('', val,) + return (('', val,),) if as_path else ('', val,) @u_format diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index 531350d..1afb88a 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -73,8 +73,10 @@ def test_natsort_key_private(): assert _natsort_key('../Folder (10)/file (2).tar.gz', as_path=True) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', as_path=True) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) - # It gracefully handles as_path for numeric input. It also handles recursion well. - assert _natsort_key(10, as_path=True) == ('', 10) + # It gracefully handles as_path for numeric input by putting an extra tuple around it + # so it will sort against the other as_path results. + assert _natsort_key(10, as_path=True) == (('', 10),) + # as_path also handles recursion well. assert _natsort_key(('/Folder', '/Folder (1)'), as_path=True) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')'))) # Turn on py3_safe to put a '' between adjacent numbers @@ -198,6 +200,9 @@ def test_natsorted(): '/p/Folder (1)/file (1).tar.gz', '/p/Folder (10)/file.tar.gz',] + # You can sort paths and numbers, not that you'd want to + assert natsorted(['/Folder (9)/file.exe', 43], as_path=True) == [43, '/Folder (9)/file.exe'] + def test_versorted(): -- cgit v1.2.1 From e85e785653ee3a61c842febdd7289869c8f09d74 Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Thu, 17 Jul 2014 19:43:39 -0700 Subject: Added the --paths option to the CL tool. This option will trigger the as_path option to natsorted. This will make the natsort command line tool much more useful, since sorting file paths is likely a common task for natsort. --- natsort/__main__.py | 10 ++++++++- test_natsort/test_main.py | 52 ++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/natsort/__main__.py b/natsort/__main__.py index ebc4300..6f81b17 100644 --- a/natsort/__main__.py +++ b/natsort/__main__.py @@ -23,6 +23,13 @@ def main(): formatter_class=RawDescriptionHelpFormatter) parser.add_argument('--version', action='version', version='%(prog)s {0}'.format(__version__)) + parser.add_argument('-p', '--paths', default=False, action='store_true', + help='Interpret the input as file paths. This is not ' + 'strictly necessary to sort all file paths, but in cases ' + 'where there are OS-generated file paths like "Folder/" ' + 'and "Folder (1)/", this option is needed to make the ' + 'paths sorted in the order you expect ("Folder/" before ' + '"Folder (1)/").') parser.add_argument('-f', '--filter', help='Used for ' 'keeping only the entries that have a number ' 'falling in the given range.', nargs=2, type=float, @@ -51,7 +58,7 @@ def main(): dest='exp', help='Do not consider an exponential as part ' 'of a number, i.e. 1e4, would be considered as 1, "e", ' 'and 4, not as 10000. This only effects the ' - '--number_type=float.') + '--number-type=float.') parser.add_argument('entries', help='The entries to sort. Taken from stdin ' 'if nothing is given on the command line.', nargs='*', default=sys.stdin) @@ -131,6 +138,7 @@ def sort_and_print_entries(entries, args): 'float': float}[args.number_type], 'signed': args.signed, 'exp': args.exp, + 'as_path': args.paths, 'reverse': args.reverse,} # Pre-remove entries that don't pass the filtering criteria diff --git a/test_natsort/test_main.py b/test_natsort/test_main.py index c8a5825..d236d2d 100644 --- a/test_natsort/test_main.py +++ b/test_natsort/test_main.py @@ -127,6 +127,24 @@ a5.3 a453.6 """ + # To sort complicated filenames you need --paths + sys.argv[1:] = ['/Folder (1)/', '/Folder/', '/Folder (10)/'] + main() + out, __ = capsys.readouterr() + assert out == """\ +/Folder (1)/ +/Folder (10)/ +/Folder/ +""" + sys.argv[1:] = ['--paths', '/Folder (1)/', '/Folder/', '/Folder (10)/'] + main() + out, __ = capsys.readouterr() + assert out == """\ +/Folder/ +/Folder (1)/ +/Folder (10)/ +""" + def test_range_check(): @@ -177,7 +195,7 @@ def test_sort_and_print_entries(capsys): class Args: """A dummy class to simulate the argparse Namespace object""" - def __init__(self, filter, reverse_filter, exclude, reverse): + def __init__(self, filter, reverse_filter, exclude, as_path, reverse): self.filter = filter self.reverse_filter = reverse_filter self.exclude = exclude @@ -185,19 +203,35 @@ def test_sort_and_print_entries(capsys): self.number_type = 'float' self.signed = True self.exp = True + self.paths = as_path entries = ['tmp/a57/path2', 'tmp/a23/path1', 'tmp/a1/path1', + 'tmp/a1 (1)/path1', 'tmp/a130/path1', 'tmp/a64/path1', 'tmp/a64/path2'] # Just sort the paths - sort_and_print_entries(entries, Args(None, None, False, False)) + sort_and_print_entries(entries, Args(None, None, False, False, False)) + out, __ = capsys.readouterr() + assert out == """\ +tmp/a1 (1)/path1 +tmp/a1/path1 +tmp/a23/path1 +tmp/a57/path2 +tmp/a64/path1 +tmp/a64/path2 +tmp/a130/path1 +""" + + # You would use --paths to make them sort as paths when the OS makes duplicates + sort_and_print_entries(entries, Args(None, None, False, True, False)) out, __ = capsys.readouterr() assert out == """\ tmp/a1/path1 +tmp/a1 (1)/path1 tmp/a23/path1 tmp/a57/path2 tmp/a64/path1 @@ -206,7 +240,7 @@ tmp/a130/path1 """ # Sort the paths with numbers between 20-100 - sort_and_print_entries(entries, Args([(20, 100)], None, False, False)) + sort_and_print_entries(entries, Args([(20, 100)], None, False, False, False)) out, __ = capsys.readouterr() assert out == """\ tmp/a23/path1 @@ -216,27 +250,30 @@ tmp/a64/path2 """ # Sort the paths without numbers between 20-100 - sort_and_print_entries(entries, Args(None, [(20, 100)], False, False)) + sort_and_print_entries(entries, Args(None, [(20, 100)], False, True, False)) out, __ = capsys.readouterr() assert out == """\ tmp/a1/path1 +tmp/a1 (1)/path1 tmp/a130/path1 """ # Sort the paths, excluding 23 and 130 - sort_and_print_entries(entries, Args(None, None, [23, 130], False)) + sort_and_print_entries(entries, Args(None, None, [23, 130], True, False)) out, __ = capsys.readouterr() assert out == """\ tmp/a1/path1 +tmp/a1 (1)/path1 tmp/a57/path2 tmp/a64/path1 tmp/a64/path2 """ # Sort the paths, excluding 2 - sort_and_print_entries(entries, Args(None, None, [2], False)) + sort_and_print_entries(entries, Args(None, None, [2], False, False)) out, __ = capsys.readouterr() assert out == """\ +tmp/a1 (1)/path1 tmp/a1/path1 tmp/a23/path1 tmp/a64/path1 @@ -244,7 +281,7 @@ tmp/a130/path1 """ # Sort in reverse order - sort_and_print_entries(entries, Args(None, None, False, True)) + sort_and_print_entries(entries, Args(None, None, False, True, True)) out, __ = capsys.readouterr() assert out == """\ tmp/a130/path1 @@ -252,5 +289,6 @@ tmp/a64/path2 tmp/a64/path1 tmp/a57/path2 tmp/a23/path1 +tmp/a1 (1)/path1 tmp/a1/path1 """ -- cgit v1.2.1 From 65778c9054703de6e6e242998c674181fdb6d671 Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Thu, 17 Jul 2014 19:32:15 -0700 Subject: Added a stress tester. This stress tester generates random strings and tries to sort them. --- .travis.yml | 1 + test_natsort/stress_natsort.py | 51 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 test_natsort/stress_natsort.py diff --git a/.travis.yml b/.travis.yml index 4d09ad0..f8c1cba 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,7 @@ script: - python -m pytest --cov natsort - python -m pytest --doctest-modules natsort - python -m pytest README.rst +- python -m pytest test_natsort/stress_natsort.py after_success: coveralls deploy: diff --git a/test_natsort/stress_natsort.py b/test_natsort/stress_natsort.py new file mode 100644 index 0000000..b4394e8 --- /dev/null +++ b/test_natsort/stress_natsort.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +"""\ +This file contains functions to stress-test natsort. +""" +import sys +import random +import string +import copy +from pytest import fail +from natsort import natsorted +from natsort.py23compat import py23_range + + +def test_random(): + """Try to sort 1,000,000 randomly generated strings without exception.""" + + # Repeat test 1,000,000 times + for _ in py23_range(1000000): + # Made a list of five randomly generated strings + lst = [''.join(random.sample(string.printable, random.randint(7, 30))) for __ in py23_range(5)] + # Try to sort. If there is an exception, give some detailed info. + try: + natsorted(lst) + except Exception as e: + msg = "Ended with exception type '{exc}: {msg}'.\n" + msg += "Failed on the input {lst}." + fail(msg.format(exc=type(e).__name__, msg=str(e), lst=str(lst))) + + +def test_similar(): + """Try to sort 1,000,000 randomly generated similar strings without exception.""" + + # Repeat test 1,000,000 times + for _ in py23_range(1000000): + # Create a randomly generated string + base = random.sample(string.printable, random.randint(7, 30)) + # Make a list of strings based on this string, with some randomly generated modifications + lst = [] + for __ in py23_range(5): + new_str = copy.copy(base) + for ___ in py23_range(random.randint(1,5)): + new_str[random.randint(0,len(base)-1)] = random.choice(string.printable) + lst.append(''.join(new_str)) + # Try to sort. If there is an exception, give some detailed info. + try: + natsorted(lst) + except Exception as e: + msg = "Ended with exception type '{exc}: {msg}'.\n" + msg += "Failed on the input {lst}." + fail(msg.format(exc=type(e).__name__, msg=str(e), lst=str(lst))) + -- cgit v1.2.1 From dd6f05255d756468ae13afaa14f08cb6f500051a Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Thu, 17 Jul 2014 19:34:42 -0700 Subject: Package now passes pyflakes tests. This was mostly import problems. The setup.cfg has been updated with options to refine what is checked. --- natsort/__main__.py | 2 -- natsort/natsort.py | 4 +--- setup.cfg | 7 +++++++ test_natsort/profile_natsorted.py | 42 ++++++++++++++++++++++----------------- 4 files changed, 32 insertions(+), 23 deletions(-) diff --git a/natsort/__main__.py b/natsort/__main__.py index 6f81b17..e7c3789 100644 --- a/natsort/__main__.py +++ b/natsort/__main__.py @@ -2,8 +2,6 @@ from __future__ import print_function, division, unicode_literals, absolute_import import sys -import os -import re from .natsort import natsorted, regex_and_num_function_chooser from ._version import __version__ diff --git a/natsort/natsort.py b/natsort/natsort.py index bcbd1f0..7e82c4c 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -15,7 +15,6 @@ See the README or the natsort homepage for more details. from __future__ import print_function, division, unicode_literals, absolute_import import re -import sys from os import curdir, pardir from os.path import split, splitext from operator import itemgetter @@ -23,8 +22,7 @@ from functools import partial from itertools import islice from warnings import warn -from .py23compat import u_format, py23_basestring, py23_str, \ - py23_range, py23_zip +from .py23compat import u_format, py23_str, py23_zip __doc__ = u_format(__doc__) # Make sure the doctest works for either # python2 or python3 diff --git a/setup.cfg b/setup.cfg index 7c65a3f..f9b23b8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,3 +3,10 @@ universal = 1 [sdist] formats = zip,gztar + +[pytest] +flakes-ignore = + test_natsort/* ALL + natsort/py23compat.py UndefinedName + natsort/__init__.py UnusedImport + setup.py ALL diff --git a/test_natsort/profile_natsorted.py b/test_natsort/profile_natsorted.py index dc52bf8..5140cc1 100644 --- a/test_natsort/profile_natsorted.py +++ b/test_natsort/profile_natsorted.py @@ -1,3 +1,8 @@ +# -*- coding: utf-8 -*- +"""\ +This file contains functions to profile natsorted with different +inputs and different settings. +""" from __future__ import print_function import cProfile import random @@ -5,103 +10,104 @@ import sys sys.path.insert(0, '.') from natsort import natsorted, index_natsorted +from natsort.py23compat import py23_range # Sample lists to sort -nums = random.sample(xrange(10000), 1000) -nstr = list(map(str,random.sample(xrange(10000), 1000))) -astr = ['a'+x+'num' for x in map(str,random.sample(xrange(10000), 1000))] -tstr = [['a'+x, 'a-'+x] for x in map(str,random.sample(xrange(10000), 1000))] -cstr = ['a'+x+'-'+x for x in map(str,random.sample(xrange(10000), 1000))] +nums = random.sample(py23_range(10000), 1000) +nstr = list(map(str,random.sample(py23_range(10000), 1000))) +astr = ['a'+x+'num' for x in map(str,random.sample(py23_range(10000), 1000))] +tstr = [['a'+x, 'a-'+x] for x in map(str,random.sample(py23_range(10000), 1000))] +cstr = ['a'+x+'-'+x for x in map(str,random.sample(py23_range(10000), 1000))] def prof_nums(a): print('*** Basic Call, Numbers ***') - for _ in xrange(1000): + for _ in py23_range(1000): natsorted(a) cProfile.run('prof_nums(nums)', sort='time') def prof_num_str(a): print('*** Basic Call, Numbers as Strings ***') - for _ in xrange(1000): + for _ in py23_range(1000): natsorted(a) cProfile.run('prof_num_str(nstr)', sort='time') def prof_str(a): print('*** Basic Call, Strings ***') - for _ in xrange(1000): + for _ in py23_range(1000): natsorted(a) cProfile.run('prof_str(astr)', sort='time') def prof_str_index(a): print('*** Basic Index Call ***') - for _ in xrange(1000): + for _ in py23_range(1000): index_natsorted(a) cProfile.run('prof_str_index(astr)', sort='time') def prof_nested(a): print('*** Basic Call, Nested Strings ***') - for _ in xrange(1000): + for _ in py23_range(1000): natsorted(a) cProfile.run('prof_nested(tstr)', sort='time') def prof_str_noexp(a): print('*** No-Exp Call ***') - for _ in xrange(1000): + for _ in py23_range(1000): natsorted(a, exp=False) cProfile.run('prof_str_noexp(astr)', sort='time') def prof_str_unsigned(a): print('*** Unsigned Call ***') - for _ in xrange(1000): + for _ in py23_range(1000): natsorted(a, signed=False) cProfile.run('prof_str_unsigned(astr)', sort='time') def prof_str_unsigned_noexp(a): print('*** Unsigned No-Exp Call ***') - for _ in xrange(1000): + for _ in py23_range(1000): natsorted(a, signed=False, exp=False) cProfile.run('prof_str_unsigned_noexp(astr)', sort='time') def prof_str_asint(a): print('*** Int Call ***') - for _ in xrange(1000): + for _ in py23_range(1000): natsorted(a, number_type=int) cProfile.run('prof_str_asint(astr)', sort='time') def prof_str_asint_unsigned(a): print('*** Unsigned Int (Versions) Call ***') - for _ in xrange(1000): + for _ in py23_range(1000): natsorted(a, number_type=int, signed=False) cProfile.run('prof_str_asint_unsigned(astr)', sort='time') def prof_str_key(a): print('*** Basic Call With Key ***') - for _ in xrange(1000): + for _ in py23_range(1000): natsorted(a, key=lambda x: x.upper()) cProfile.run('prof_str_key(astr)', sort='time') def prof_str_index_key(a): print('*** Basic Index Call With Key ***') - for _ in xrange(1000): + for _ in py23_range(1000): index_natsorted(a, key=lambda x: x.upper()) cProfile.run('prof_str_index_key(astr)', sort='time') def prof_str_unorderable(a): print('*** Basic Index Call, "Unorderable" ***') - for _ in xrange(1000): + for _ in py23_range(1000): natsorted(a) cProfile.run('prof_str_unorderable(cstr)', sort='time') -- cgit v1.2.1 From 517c3a28d0b8b7e54d6a6c07ad08c615dc9d150b Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Thu, 17 Jul 2014 20:53:12 -0700 Subject: Package now passes pep8 tests. Some the test_natsort.py script has some line length rules ignored, but otherwise all rules are obeyed. --- .travis.yml | 4 +- natsort/__init__.py | 26 ++++----- natsort/__main__.py | 116 +++++++++++++++++++++----------------- natsort/_version.py | 3 +- natsort/natsort.py | 106 ++++++++++++++++++---------------- natsort/py23compat.py | 12 ++-- setup.cfg | 5 +- setup.py | 34 +++++------ test_natsort/profile_natsorted.py | 10 ++-- test_natsort/stress_natsort.py | 28 ++++----- test_natsort/test_main.py | 27 +++++---- test_natsort/test_natsort.py | 39 +++++++------ 12 files changed, 221 insertions(+), 189 deletions(-) diff --git a/.travis.yml b/.travis.yml index f8c1cba..c6f70f9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,12 +6,12 @@ python: - 3.3 - 3.4 install: -- pip install pytest-cov +- pip install pytest-cov pytest-flakes pytest-pep8 - pip install coveralls - pip install wheel - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi script: -- python -m pytest --cov natsort +- python -m pytest --cov natsort --flakes --pep8 - python -m pytest --doctest-modules natsort - python -m pytest README.rst - python -m pytest test_natsort/stress_natsort.py diff --git a/natsort/__init__.py b/natsort/__init__.py index f75b09b..ac8171d 100644 --- a/natsort/__init__.py +++ b/natsort/__init__.py @@ -1,18 +1,18 @@ # -*- coding: utf-8 -*- -from __future__ import print_function, division, unicode_literals, absolute_import +from __future__ import (print_function, division, + unicode_literals, absolute_import) -from .natsort import natsort_key, natsort_keygen, natsorted, \ - index_natsorted, versorted, index_versorted, \ - order_by_index +from .natsort import (natsort_key, natsort_keygen, natsorted, + index_natsorted, versorted, index_versorted, + order_by_index) from ._version import __version__ __all__ = [ - 'natsort_key', - 'natsort_keygen', - 'natsorted', - 'versorted' - 'index_natsorted', - 'index_versorted', - 'order_by_index', - ] - + 'natsort_key', + 'natsort_keygen', + 'natsorted', + 'versorted' + 'index_natsorted', + 'index_versorted', + 'order_by_index', +] diff --git a/natsort/__main__.py b/natsort/__main__.py index e7c3789..af8ef63 100644 --- a/natsort/__main__.py +++ b/natsort/__main__.py @@ -1,9 +1,10 @@ # -*- coding: utf-8 -*- -from __future__ import print_function, division, unicode_literals, absolute_import +from __future__ import (print_function, division, + unicode_literals, absolute_import) import sys -from .natsort import natsorted, regex_and_num_function_chooser +from .natsort import natsorted, regex_and_num_function_chooser from ._version import __version__ from .py23compat import py23_str @@ -21,45 +22,50 @@ def main(): formatter_class=RawDescriptionHelpFormatter) parser.add_argument('--version', action='version', version='%(prog)s {0}'.format(__version__)) - parser.add_argument('-p', '--paths', default=False, action='store_true', - help='Interpret the input as file paths. This is not ' - 'strictly necessary to sort all file paths, but in cases ' - 'where there are OS-generated file paths like "Folder/" ' - 'and "Folder (1)/", this option is needed to make the ' - 'paths sorted in the order you expect ("Folder/" before ' - '"Folder (1)/").') - parser.add_argument('-f', '--filter', help='Used for ' - 'keeping only the entries that have a number ' - 'falling in the given range.', nargs=2, type=float, - metavar=('LOW', 'HIGH'), action='append') - parser.add_argument('-F', '--reverse-filter', help='Used for ' - 'excluding the entries that have a number ' - 'falling in the given range.', nargs=2, type=float, - metavar=('LOW', 'HIGH'), action='append', - dest='reverse_filter') - parser.add_argument('-e', '--exclude', type=float, action='append', - help='Used to exclude an entry ' - 'that contains a specific number.') - parser.add_argument('-r', '--reverse', help='Returns in reversed order.', - action='store_true', default=False) - parser.add_argument('-t', '--number-type', '--number_type', dest='number_type', - choices=('digit', 'int', 'float', 'version', 'ver'), - default='float', help='Choose the type of number ' - 'to search for. "float" will search for floating-point ' - 'numbers. "int" will only search for integers. ' - '"digit", "version", and "ver" are shortcuts for "int" ' - 'with --nosign.') - parser.add_argument('--nosign', default=True, action='store_false', - dest='signed', help='Do not consider "+" or "-" as part ' - 'of a number, i.e. do not take sign into consideration.') - parser.add_argument('--noexp', default=True, action='store_false', - dest='exp', help='Do not consider an exponential as part ' - 'of a number, i.e. 1e4, would be considered as 1, "e", ' - 'and 4, not as 10000. This only effects the ' - '--number-type=float.') - parser.add_argument('entries', help='The entries to sort. Taken from stdin ' - 'if nothing is given on the command line.', nargs='*', - default=sys.stdin) + parser.add_argument( + '-p', '--paths', default=False, action='store_true', + help='Interpret the input as file paths. This is not ' + 'strictly necessary to sort all file paths, but in cases ' + 'where there are OS-generated file paths like "Folder/" ' + 'and "Folder (1)/", this option is needed to make the ' + 'paths sorted in the order you expect ("Folder/" before ' + '"Folder (1)/").') + parser.add_argument( + '-f', '--filter', nargs=2, type=float, metavar=('LOW', 'HIGH'), + action='append', + help='Used for keeping only the entries that have a number ' + 'falling in the given range.') + parser.add_argument( + '-F', '--reverse-filter', nargs=2, type=float, + metavar=('LOW', 'HIGH'), action='append', dest='reverse_filter', + help='Used for excluding the entries that have a number ' + 'falling in the given range.') + parser.add_argument( + '-e', '--exclude', type=float, action='append', + help='Used to exclude an entry that contains a specific number.') + parser.add_argument( + '-r', '--reverse', action='store_true', default=False, + help='Returns in reversed order.') + parser.add_argument( + '-t', '--number-type', '--number_type', dest='number_type', + choices=('digit', 'int', 'float', 'version', 'ver'), default='float', + help='Choose the type of number to search for. "float" will search ' + 'for floating-point numbers. "int" will only search for ' + 'integers. "digit", "version", and "ver" are shortcuts for "int" ' + 'with --nosign.') + parser.add_argument( + '--nosign', default=True, action='store_false', dest='signed', + help='Do not consider "+" or "-" as part of a number, i.e. do not ' + 'take sign into consideration.') + parser.add_argument( + '--noexp', default=True, action='store_false', dest='exp', + help='Do not consider an exponential as part of a number, i.e. 1e4, ' + 'would be considered as 1, "e", and 4, not as 10000. This only ' + 'effects the --number-type=float.') + parser.add_argument( + 'entries', nargs='*', default=sys.stdin, + help='The entries to sort. Taken from stdin if nothing is given on ' + 'the command line.', ) args = parser.parse_args() # Make sure the filter range is given properly. Does nothing if no filter @@ -110,8 +116,8 @@ def keep_entry_range(entry, lows, highs, converter, regex): and False if it is not in the range and should not be kept. """ return any(low <= converter(num) <= high - for num in regex.findall(entry) - for low, high in zip(lows, highs)) + for num in regex.findall(entry) + for low, high in zip(lows, highs)) def exclude_entry(entry, values, converter, regex): @@ -137,25 +143,32 @@ def sort_and_print_entries(entries, args): 'signed': args.signed, 'exp': args.exp, 'as_path': args.paths, - 'reverse': args.reverse,} + 'reverse': args.reverse, } # Pre-remove entries that don't pass the filtering criteria - # Make sure we use the same searching algorithm for filtering as for sorting. - if args.filter is not None or args.reverse_filter is not None or args.exclude: + # Make sure we use the same searching algorithm for filtering + # as for sorting. + do_filter = args.filter is not None or args.reverse_filter is not None + if do_filter or args.exclude: inp_options = (kwargs['number_type'], args.signed, args.exp) regex, num_function = regex_and_num_function_chooser[inp_options] if args.filter is not None: - lows, highs = [f[0] for f in args.filter], [f[1] for f in args.filter] + lows, highs = ([f[0] for f in args.filter], + [f[1] for f in args.filter]) entries = [entry for entry in entries - if keep_entry_range(entry, lows, highs, num_function, regex)] + if keep_entry_range(entry, lows, highs, + num_function, regex)] if args.reverse_filter is not None: - lows, highs = [f[0] for f in args.reverse_filter], [f[1] for f in args.reverse_filter] + lows, highs = ([f[0] for f in args.reverse_filter], + [f[1] for f in args.reverse_filter]) entries = [entry for entry in entries - if not keep_entry_range(entry, lows, highs, num_function, regex)] + if not keep_entry_range(entry, lows, highs, + num_function, regex)] if args.exclude: exclude = set(args.exclude) entries = [entry for entry in entries - if exclude_entry(entry, exclude, num_function, regex)] + if exclude_entry(entry, exclude, + num_function, regex)] # Print off the sorted results for entry in natsorted(entries, **kwargs): @@ -169,4 +182,3 @@ if __name__ == '__main__': sys.exit(py23_str(a)) except KeyboardInterrupt: sys.exit(1) - diff --git a/natsort/_version.py b/natsort/_version.py index de2a514..1c3ba50 100644 --- a/natsort/_version.py +++ b/natsort/_version.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- -from __future__ import print_function, division, unicode_literals, absolute_import +from __future__ import (print_function, division, + unicode_literals, absolute_import) __version__ = '3.3.0' diff --git a/natsort/natsort.py b/natsort/natsort.py index 7e82c4c..bb26ebe 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -12,7 +12,8 @@ See the README or the natsort homepage for more details. """ -from __future__ import print_function, division, unicode_literals, absolute_import +from __future__ import (print_function, division, + unicode_literals, absolute_import) import re from os import curdir, pardir @@ -24,8 +25,8 @@ from warnings import warn from .py23compat import u_format, py23_str, py23_zip -__doc__ = u_format(__doc__) # Make sure the doctest works for either - # python2 or python3 +__doc__ = u_format(__doc__) # Make sure the doctest works for either + # python2 or python3 # The regex that locates floats float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)') @@ -37,18 +38,18 @@ int_nosign_re = re.compile(r'(\d+)') int_sign_re = re.compile(r'([-+]?\d+)') # This dict will help select the correct regex and number conversion function. regex_and_num_function_chooser = { - (float, True, True) : (float_sign_exp_re, float), - (float, True, False) : (float_sign_noexp_re, float), - (float, False, True) : (float_nosign_exp_re, float), - (float, False, False) : (float_nosign_noexp_re, float), - (int, True, True) : (int_sign_re, int), - (int, True, False) : (int_sign_re, int), - (int, False, True) : (int_nosign_re, int), - (int, False, False) : (int_nosign_re, int), - (None, True, True) : (int_nosign_re, int), - (None, True, False) : (int_nosign_re, int), - (None, False, True) : (int_nosign_re, int), - (None, False, False) : (int_nosign_re, int), + (float, True, True): (float_sign_exp_re, float), + (float, True, False): (float_sign_noexp_re, float), + (float, False, True): (float_nosign_exp_re, float), + (float, False, False): (float_nosign_noexp_re, float), + (int, True, True): (int_sign_re, int), + (int, True, False): (int_sign_re, int), + (int, False, True): (int_nosign_re, int), + (int, False, False): (int_nosign_re, int), + (None, True, True): (int_nosign_re, int), + (None, True, False): (int_nosign_re, int), + (None, False, True): (int_nosign_re, int), + (None, False, False): (int_nosign_re, int), } # Number types. I have to use set([...]) and not {...} @@ -83,7 +84,7 @@ def _number_finder(s, regex, numconv, py3_safe): # using isinstance. This was chosen at the expense of the less # common case of a number being at the front of the list. try: - s[0][0] # str supports indexing, but not numbers + s[0][0] # str supports indexing, but not numbers except TypeError: s = [''] + s @@ -91,7 +92,7 @@ def _number_finder(s, regex, numconv, py3_safe): # and is used to get around "unorderable types" in complex cases. # It is a separate function that needs to be requested specifically # because it is expensive to call. - return _py3_safe(s) if py3_safe else s + return _py3_safe(s) if py3_safe else s def _path_splitter(s): @@ -107,7 +108,8 @@ def _path_splitter(s): if path_location == parent_path: break p_append(child_path) - # This last append is the base path. Only append if the string is non-empty. + # This last append is the base path. + # Only append if the string is non-empty. if path_location: p_append(path_location) # We created this list in reversed order, so we now correct the order. @@ -123,7 +125,8 @@ def _path_splitter(s): front = base base, ext = splitext(front) if d_match(ext) or not ext: - base = front # Reset base to before the split if the split is invalid. + # Reset base to before the split if the split is invalid. + base = front break b_append(ext) b_append(base) @@ -134,13 +137,14 @@ def _path_splitter(s): def _py3_safe(parsed_list): """Insert '' between two numbers.""" - if len(parsed_list) < 2: + length = len(parsed_list) + if length < 2: return parsed_list else: new_list = [parsed_list[0]] nl_append = new_list.append ntypes = number_types - for before, after in py23_zip(islice(parsed_list, 0, len(parsed_list)-1), + for before, after in py23_zip(islice(parsed_list, 0, length-1), islice(parsed_list, 1, None)): # I realize that isinstance is favored over type, but # in this case type is SO MUCH FASTER than isinstance!! @@ -151,7 +155,7 @@ def _py3_safe(parsed_list): def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, - as_path=False, py3_safe=False): + as_path=False, py3_safe=False): """\ Key to sort strings and numbers naturally. @@ -175,7 +179,7 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, The modified value with numbers extracted. """ - + # Convert the arguments to the proper input tuple inp_options = (number_type, signed, exp) try: @@ -183,20 +187,21 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, except KeyError: # Report errors properly if number_type not in (float, int) and number_type is not None: - raise ValueError("_natsort_key: 'number_type' " - "parameter '{0}' invalid".format(py23_str(number_type))) + raise ValueError("_natsort_key: 'number_type' parameter " + "'{0}' invalid".format(py23_str(number_type))) elif signed not in (True, False): - raise ValueError("_natsort_key: 'signed' " - "parameter '{0}' invalid".format(py23_str(signed))) + raise ValueError("_natsort_key: 'signed' parameter " + "'{0}' invalid".format(py23_str(signed))) elif exp not in (True, False): - raise ValueError("_natsort_key: 'exp' " - "parameter '{0}' invalid".format(py23_str(exp))) + raise ValueError("_natsort_key: 'exp' parameter " + "'{0}' invalid".format(py23_str(exp))) else: # Apply key if needed. if key is not None: val = key(val) - # If this is a path, convert it. An AttrubuteError is raised if not a string. + # If this is a path, convert it. + # An AttrubuteError is raised if not a string. split_as_path = False if as_path: try: @@ -217,8 +222,8 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, # If this string was split as a path, set as_path to False. try: return tuple([_natsort_key(x, None, number_type, signed, - exp, as_path and not split_as_path, - py3_safe) for x in val]) + exp, as_path and not split_as_path, + py3_safe) for x in val]) # If there is still an error, it must be a number. # Return as-is, with a leading empty string. # Waiting for two raised errors instead of calling @@ -232,7 +237,7 @@ def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, @u_format def natsort_key(val, key=None, number_type=float, signed=True, exp=True, - as_path=False, py3_safe=False): + as_path=False, py3_safe=False): """\ Key to sort strings and numbers naturally. @@ -437,23 +442,25 @@ def natsort_keygen(key=None, number_type=float, signed=True, exp=True, >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] >>> b = a[:] - >>> a.sort(key=lambda x: natsort_key(x, key=lambda y: y.upper(), signed=False)) + >>> a.sort(key=lambda x: natsort_key(x, key=lambda y: y.upper(), + ... signed=False)) >>> b.sort(key=natsort_keygen(key=lambda x: x.upper(), signed=False)) >>> a == b True """ - return partial(_natsort_key, key=key, - number_type=number_type, - signed=signed, - exp=exp, - as_path=as_path, - py3_safe=py3_safe) + return partial(_natsort_key, + key=key, + number_type=number_type, + signed=signed, + exp=exp, + as_path=as_path, + py3_safe=py3_safe) @u_format def natsorted(seq, key=None, number_type=float, signed=True, exp=True, - reverse=False, as_path=False): + reverse=False, as_path=False): """\ Sorts a sequence naturally. @@ -594,7 +601,7 @@ def versorted(seq, key=None, reverse=False, as_path=False): @u_format def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, - reverse=False, as_path=False): + reverse=False, as_path=False): """\ Return the list of the indexes used to sort the input sequence. @@ -677,7 +684,7 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, if key is None: newkey = itemgetter(1) else: - newkey = lambda x : key(itemgetter(1)(x)) + newkey = lambda x: key(itemgetter(1)(x)) # Pair the index and sequence together, then sort by element index_seq_pair = [[x, y] for x, y in enumerate(seq)] try: @@ -760,29 +767,29 @@ def index_versorted(seq, key=None, reverse=False, as_path=False): def order_by_index(seq, index, iter=False): """\ Order a given sequence by an index sequence. - + The output of `index_natsorted` and `index_versorted` is a sequence of integers (index) that correspond to how its input sequence **would** be sorted. The idea is that this index can be used to reorder multiple sequences by the sorted order of the first sequence. This function is a convenient wrapper to apply this ordering to a sequence. - + Parameters ---------- seq : iterable The sequence to order. - + index : iterable The sequence that indicates how to order `seq`. It should be the same length as `seq` and consist of integers only. - + iter : {{True, False}}, optional If `True`, the ordered sequence is returned as a generator expression; otherwise it is returned as a list. The default is `False`. - + Returns ------- out : {{list, generator}} @@ -793,7 +800,7 @@ def order_by_index(seq, index, iter=False): -------- index_natsorted index_versorted - + Examples -------- @@ -813,4 +820,3 @@ def order_by_index(seq, index, iter=False): """ return (seq[i] for i in index) if iter else [seq[i] for i in index] - diff --git a/natsort/py23compat.py b/natsort/py23compat.py index 85c06e1..3f3fb92 100644 --- a/natsort/py23compat.py +++ b/natsort/py23compat.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -from __future__ import print_function, division, unicode_literals, absolute_import +from __future__ import (print_function, division, + unicode_literals, absolute_import) import functools import sys @@ -36,9 +37,9 @@ def _modify_str_or_docstring(str_change_func): else: func = func_or_str doc = func.__doc__ - + doc = str_change_func(doc) - + if func: func.__doc__ = doc return func @@ -52,7 +53,7 @@ if sys.version[0] == '3': @_modify_str_or_docstring def u_format(s): """"{u}'abc'" --> "'abc'" (Python 3) - + Accepts a string or a function, so it can be used as a decorator.""" return s.format(u='') else: @@ -60,7 +61,6 @@ else: @_modify_str_or_docstring def u_format(s): """"{u}'abc'" --> "u'abc'" (Python 2) - + Accepts a string or a function, so it can be used as a decorator.""" return s.format(u='u') - diff --git a/setup.cfg b/setup.cfg index f9b23b8..98a5fe7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,7 +6,8 @@ formats = zip,gztar [pytest] flakes-ignore = - test_natsort/* ALL natsort/py23compat.py UndefinedName natsort/__init__.py UnusedImport - setup.py ALL + +pep8ignore = + test_natsort/test_natsort.py E501 E241 E221 diff --git a/setup.py b/setup.py index 925853a..e663cd1 100644 --- a/setup.py +++ b/setup.py @@ -55,21 +55,21 @@ REQUIRES = 'argparse' if sys.version[:3] in ('2.6', '3.0', '3.1') else '' # The setup parameters -setup(name='natsort', - version=VERSION, - author='Seth M. Morton', - author_email='drtuba78@gmail.com', - url='https://github.com/SethMMorton/natsort', - license='MIT', - install_requires=REQUIRES, - packages=['natsort'], - entry_points={'console_scripts': ['natsort = natsort.__main__:main']}, - tests_require=['pytest'], - cmdclass = {'test': PyTest}, - description=DESCRIPTION, - long_description=LONG_DESCRIPTION, - classifiers=( - #'Development Status :: 4 - Beta', +setup( + name='natsort', + version=VERSION, + author='Seth M. Morton', + author_email='drtuba78@gmail.com', + url='https://github.com/SethMMorton/natsort', + license='MIT', + install_requires=REQUIRES, + packages=['natsort'], + entry_points={'console_scripts': ['natsort = natsort.__main__:main']}, + tests_require=['pytest'], + cmdclass={'test': PyTest}, + description=DESCRIPTION, + long_description=LONG_DESCRIPTION, + classifiers=( 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', @@ -83,5 +83,5 @@ setup(name='natsort', 'Programming Language :: Python :: 3', 'Topic :: Scientific/Engineering :: Information Analysis', 'Topic :: Utilities', - ) - ) + ) +) diff --git a/test_natsort/profile_natsorted.py b/test_natsort/profile_natsorted.py index 5140cc1..802fe5f 100644 --- a/test_natsort/profile_natsorted.py +++ b/test_natsort/profile_natsorted.py @@ -15,10 +15,11 @@ from natsort.py23compat import py23_range # Sample lists to sort nums = random.sample(py23_range(10000), 1000) -nstr = list(map(str,random.sample(py23_range(10000), 1000))) -astr = ['a'+x+'num' for x in map(str,random.sample(py23_range(10000), 1000))] -tstr = [['a'+x, 'a-'+x] for x in map(str,random.sample(py23_range(10000), 1000))] -cstr = ['a'+x+'-'+x for x in map(str,random.sample(py23_range(10000), 1000))] +nstr = list(map(str, random.sample(py23_range(10000), 1000))) +astr = ['a'+x+'num' for x in map(str, random.sample(py23_range(10000), 1000))] +tstr = [['a'+x, 'a-'+x] + for x in map(str, random.sample(py23_range(10000), 1000))] +cstr = ['a'+x+'-'+x for x in map(str, random.sample(py23_range(10000), 1000))] def prof_nums(a): @@ -110,4 +111,3 @@ def prof_str_unorderable(a): for _ in py23_range(1000): natsorted(a) cProfile.run('prof_str_unorderable(cstr)', sort='time') - diff --git a/test_natsort/stress_natsort.py b/test_natsort/stress_natsort.py index b4394e8..c890e5f 100644 --- a/test_natsort/stress_natsort.py +++ b/test_natsort/stress_natsort.py @@ -2,10 +2,9 @@ """\ This file contains functions to stress-test natsort. """ -import sys -import random -import string -import copy +from random import randint, sample, choice +from string import printable +from copy import copy from pytest import fail from natsort import natsorted from natsort.py23compat import py23_range @@ -17,7 +16,8 @@ def test_random(): # Repeat test 1,000,000 times for _ in py23_range(1000000): # Made a list of five randomly generated strings - lst = [''.join(random.sample(string.printable, random.randint(7, 30))) for __ in py23_range(5)] + lst = [''.join(sample(printable, randint(7, 30))) + for __ in py23_range(5)] # Try to sort. If there is an exception, give some detailed info. try: natsorted(lst) @@ -28,19 +28,22 @@ def test_random(): def test_similar(): - """Try to sort 1,000,000 randomly generated similar strings without exception.""" + """Try to sort 1,000,000 randomly generated + similar strings without exception. + """ # Repeat test 1,000,000 times for _ in py23_range(1000000): # Create a randomly generated string - base = random.sample(string.printable, random.randint(7, 30)) - # Make a list of strings based on this string, with some randomly generated modifications + base = sample(printable, randint(7, 30)) + # Make a list of strings based on this string, + # with some randomly generated modifications lst = [] for __ in py23_range(5): - new_str = copy.copy(base) - for ___ in py23_range(random.randint(1,5)): - new_str[random.randint(0,len(base)-1)] = random.choice(string.printable) - lst.append(''.join(new_str)) + new_str = copy(base) + for ___ in py23_range(randint(1, 5)): + new_str[randint(0, len(base)-1)] = choice(printable) + lst.append(''.join(new_str)) # Try to sort. If there is an exception, give some detailed info. try: natsorted(lst) @@ -48,4 +51,3 @@ def test_similar(): msg = "Ended with exception type '{exc}: {msg}'.\n" msg += "Failed on the input {lst}." fail(msg.format(exc=type(e).__name__, msg=str(e), lst=str(lst))) - diff --git a/test_natsort/test_main.py b/test_natsort/test_main.py index d236d2d..8157c3e 100644 --- a/test_natsort/test_main.py +++ b/test_natsort/test_main.py @@ -72,7 +72,8 @@ num-6 num-2 """ - # Exclude the number 1 and 6. Both are present because we use digits/versions. + # Exclude the number 1 and 6. + # Both are present because we use digits/versions. sys.argv[1:] = ['-t', 'ver', '-e', '1', '-e', '6', 'num-2', 'num-6', 'num-1'] main() @@ -109,7 +110,8 @@ a1.0e3 """ # Include two ranges. - sys.argv[1:] = ['-f', '1', '10', '-f', '400', '500', 'a1.0e3', 'a5.3', 'a453.6'] + sys.argv[1:] = ['-f', '1', '10', '-f', '400', '500', + 'a1.0e3', 'a5.3', 'a453.6'] main() out, __ = capsys.readouterr() assert out == """\ @@ -135,7 +137,7 @@ a453.6 /Folder (1)/ /Folder (10)/ /Folder/ -""" +""" sys.argv[1:] = ['--paths', '/Folder (1)/', '/Folder/', '/Folder (10)/'] main() out, __ = capsys.readouterr() @@ -147,14 +149,14 @@ a453.6 def test_range_check(): - + # Floats are always returned assert range_check(10, 11) == (10.0, 11.0) assert range_check(6.4, 30) == (6.4, 30.0) # Invalid ranges give a ValueErro with raises(ValueError) as err: - range_check(7, 2) + range_check(7, 2) assert str(err.value) == 'low >= high' @@ -192,7 +194,7 @@ def test_exclude_entry(): def test_sort_and_print_entries(capsys): - + class Args: """A dummy class to simulate the argparse Namespace object""" def __init__(self, filter, reverse_filter, exclude, as_path, reverse): @@ -207,8 +209,8 @@ def test_sort_and_print_entries(capsys): entries = ['tmp/a57/path2', 'tmp/a23/path1', - 'tmp/a1/path1', - 'tmp/a1 (1)/path1', + 'tmp/a1/path1', + 'tmp/a1 (1)/path1', 'tmp/a130/path1', 'tmp/a64/path1', 'tmp/a64/path2'] @@ -226,7 +228,8 @@ tmp/a64/path2 tmp/a130/path1 """ - # You would use --paths to make them sort as paths when the OS makes duplicates + # You would use --paths to make them sort + # as paths when the OS makes duplicates sort_and_print_entries(entries, Args(None, None, False, True, False)) out, __ = capsys.readouterr() assert out == """\ @@ -240,7 +243,8 @@ tmp/a130/path1 """ # Sort the paths with numbers between 20-100 - sort_and_print_entries(entries, Args([(20, 100)], None, False, False, False)) + sort_and_print_entries(entries, Args([(20, 100)], None, False, + False, False)) out, __ = capsys.readouterr() assert out == """\ tmp/a23/path1 @@ -250,7 +254,8 @@ tmp/a64/path2 """ # Sort the paths without numbers between 20-100 - sort_and_print_entries(entries, Args(None, [(20, 100)], False, True, False)) + sort_and_print_entries(entries, Args(None, [(20, 100)], False, + True, False)) out, __ = capsys.readouterr() assert out == """\ tmp/a1/path1 diff --git a/test_natsort/test_natsort.py b/test_natsort/test_natsort.py index 1afb88a..0eeed12 100644 --- a/test_natsort/test_natsort.py +++ b/test_natsort/test_natsort.py @@ -176,32 +176,36 @@ def test_natsorted(): # natsort will recursively descend into lists of lists so you can # sort by the sublist contents. data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']] - assert natsorted(data) == [['a1', 'a5'], ['a1', 'a40'], ['a2', 'a5'], ['a10', 'a1']] + assert natsorted(data) == [['a1', 'a5'], ['a1', 'a40'], + ['a2', 'a5'], ['a10', 'a1']] # You can pass a key to do non-standard sorting rules b = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] - assert natsorted(b, key=itemgetter(1)) == [('c', 'num2'), ('a', 'num3'), ('b', 'num5')] + c = [('c', 'num2'), ('a', 'num3'), ('b', 'num5')] + assert natsorted(b, key=itemgetter(1)) == c # Reversing the order is allowed a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] - assert natsorted(a, reverse=True) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'][::-1] + b = ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] + assert natsorted(a, reverse=True) == b[::-1] # Sorting paths just got easier a = ['/p/Folder (10)/file.tar.gz', '/p/Folder/file.tar.gz', '/p/Folder (1)/file (1).tar.gz', - '/p/Folder (1)/file.tar.gz',] + '/p/Folder (1)/file.tar.gz'] assert natsorted(a) == ['/p/Folder (1)/file (1).tar.gz', '/p/Folder (1)/file.tar.gz', '/p/Folder (10)/file.tar.gz', - '/p/Folder/file.tar.gz',] + '/p/Folder/file.tar.gz'] assert natsorted(a, as_path=True) == ['/p/Folder/file.tar.gz', '/p/Folder (1)/file.tar.gz', '/p/Folder (1)/file (1).tar.gz', - '/p/Folder (10)/file.tar.gz',] + '/p/Folder (10)/file.tar.gz'] # You can sort paths and numbers, not that you'd want to - assert natsorted(['/Folder (9)/file.exe', 43], as_path=True) == [43, '/Folder (9)/file.exe'] + a = ['/Folder (9)/file.exe', 43] + assert natsorted(a, as_path=True) == [43, '/Folder (9)/file.exe'] def test_versorted(): @@ -209,22 +213,24 @@ def test_versorted(): a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] assert versorted(a) == natsorted(a, number_type=None) assert versorted(a, reverse=True) == versorted(a)[::-1] - a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), ('a', '1.11.4'), ('a', '1.10.1')] - assert versorted(a) == [('a', '1.9.9a'), ('a', '1.9.9b'), ('a', '1.10.1'), ('a', '1.11'), ('a', '1.11.4')] + a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), + ('a', '1.11.4'), ('a', '1.10.1')] + assert versorted(a) == [('a', '1.9.9a'), ('a', '1.9.9b'), ('a', '1.10.1'), + ('a', '1.11'), ('a', '1.11.4')] # Sorting paths just got easier a = ['/p/Folder (10)/file1.1.0.tar.gz', '/p/Folder/file1.1.0.tar.gz', '/p/Folder (1)/file1.1.0 (1).tar.gz', - '/p/Folder (1)/file1.1.0.tar.gz',] + '/p/Folder (1)/file1.1.0.tar.gz'] assert versorted(a) == ['/p/Folder (1)/file1.1.0 (1).tar.gz', '/p/Folder (1)/file1.1.0.tar.gz', '/p/Folder (10)/file1.1.0.tar.gz', - '/p/Folder/file1.1.0.tar.gz',] + '/p/Folder/file1.1.0.tar.gz'] assert versorted(a, as_path=True) == ['/p/Folder/file1.1.0.tar.gz', '/p/Folder (1)/file1.1.0.tar.gz', '/p/Folder (1)/file1.1.0 (1).tar.gz', - '/p/Folder (10)/file1.1.0.tar.gz',] + '/p/Folder (10)/file1.1.0.tar.gz'] def test_index_natsorted(): @@ -253,7 +259,7 @@ def test_index_natsorted(): # It can sort paths too a = ['/p/Folder (10)/', '/p/Folder/', - '/p/Folder (1)/',] + '/p/Folder (1)/'] assert index_natsorted(a, as_path=True) == [1, 2, 0] @@ -262,14 +268,15 @@ def test_index_versorted(): a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] assert index_versorted(a) == index_natsorted(a, number_type=None) assert index_versorted(a, reverse=True) == index_versorted(a)[::-1] - a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), ('a', '1.11.4'), ('a', '1.10.1')] + a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), + ('a', '1.11.4'), ('a', '1.10.1')] assert index_versorted(a) == [0, 2, 4, 1, 3] # It can sort paths too a = ['/p/Folder (10)/file1.1.0.tar.gz', '/p/Folder/file1.1.0.tar.gz', '/p/Folder (1)/file1.1.0 (1).tar.gz', - '/p/Folder (1)/file1.1.0.tar.gz',] + '/p/Folder (1)/file1.1.0.tar.gz'] assert index_versorted(a, as_path=True) == [1, 3, 2, 0] @@ -282,5 +289,3 @@ def test_order_by_index(): assert order_by_index(a, index) == [a[i] for i in index] assert order_by_index(a, index, True) != [a[i] for i in index] assert list(order_by_index(a, index, True)) == [a[i] for i in index] - - -- cgit v1.2.1 From 36867fffe933e14a20aa41d3e030c7dd84034438 Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Sat, 19 Jul 2014 00:02:59 -0700 Subject: 'python setup.py test' now does coverage and analysis. This was done by adding --cov, --flakes, and --pep8 to the pytest call within setup.py. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index e663cd1..e534448 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ class PyTest(TestCommand): def run_tests(self): #import here, cause outside the eggs aren't loaded import pytest - err1 = pytest.main([]) + err1 = pytest.main(['--cov', 'natsort', '--flakes', '--pep8']) err2 = pytest.main(['--doctest-modules', 'natsort']) err3 = pytest.main(['README.rst']) return err1 | err2 | err3 @@ -65,7 +65,7 @@ setup( install_requires=REQUIRES, packages=['natsort'], entry_points={'console_scripts': ['natsort = natsort.__main__:main']}, - tests_require=['pytest'], + tests_require=['pytest', 'pytest-pep8', 'pytest-flakes', 'pytest-cov'], cmdclass={'test': PyTest}, description=DESCRIPTION, long_description=LONG_DESCRIPTION, -- cgit v1.2.1 From db66506c32206077c2826a8e9004e3a97f5d1b9f Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Sat, 19 Jul 2014 00:00:49 -0700 Subject: Added expanded documentation. This documentation is in reST format, in a docs folder. The README has been pruned to only include what is needed to notify the user of what the package does, and directs them to the documentation. The tests and .travis.yml have been updated to account for the documentation. --- .travis.yml | 6 +- README.rst | 520 +++------------------------- docs/source/api.rst | 18 + docs/source/changelog.rst | 154 ++++++++ docs/source/conf.py | 280 +++++++++++++++ docs/source/examples.rst | 150 ++++++++ docs/source/index.rst | 27 ++ docs/source/index_natsorted.rst | 8 + docs/source/index_versorted.rst | 8 + docs/source/intro.rst | 116 +++++++ docs/source/natsort_key.rst | 8 + docs/source/natsort_keygen.rst | 8 + docs/source/natsorted.rst | 8 + docs/source/order_by_index.rst | 8 + docs/source/shell.rst | 137 ++++++++ docs/source/solar/NEWS.txt | 32 ++ docs/source/solar/README.rst | 28 ++ docs/source/solar/layout.html | 32 ++ docs/source/solar/static/solar.css | 344 ++++++++++++++++++ docs/source/solar/static/solarized-dark.css | 84 +++++ docs/source/solar/static/subtle_dots.png | Bin 0 -> 1025 bytes docs/source/solar/theme.conf | 4 + docs/source/versorted.rst | 8 + natsort/natsort.py | 23 +- setup.cfg | 2 + setup.py | 4 +- 26 files changed, 1541 insertions(+), 476 deletions(-) create mode 100644 docs/source/api.rst create mode 100644 docs/source/changelog.rst create mode 100644 docs/source/conf.py create mode 100644 docs/source/examples.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/index_natsorted.rst create mode 100644 docs/source/index_versorted.rst create mode 100644 docs/source/intro.rst create mode 100644 docs/source/natsort_key.rst create mode 100644 docs/source/natsort_keygen.rst create mode 100644 docs/source/natsorted.rst create mode 100644 docs/source/order_by_index.rst create mode 100644 docs/source/shell.rst create mode 100644 docs/source/solar/NEWS.txt create mode 100644 docs/source/solar/README.rst create mode 100644 docs/source/solar/layout.html create mode 100644 docs/source/solar/static/solar.css create mode 100644 docs/source/solar/static/solarized-dark.css create mode 100644 docs/source/solar/static/subtle_dots.png create mode 100644 docs/source/solar/theme.conf create mode 100644 docs/source/versorted.rst diff --git a/.travis.yml b/.travis.yml index c6f70f9..6b96f68 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,14 +9,17 @@ install: - pip install pytest-cov pytest-flakes pytest-pep8 - pip install coveralls - pip install wheel +- pip install Sphinx numpydoc - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi script: - python -m pytest --cov natsort --flakes --pep8 - python -m pytest --doctest-modules natsort -- python -m pytest README.rst +- python -m pytest README.rst docs/source/intro.rst docs/source/examples.rst - python -m pytest test_natsort/stress_natsort.py after_success: coveralls +before_deploy: +- python setup.py build_sphinx deploy: provider: pypi user: SethMMorton @@ -26,3 +29,4 @@ deploy: tags: true repo: SethMMorton/natsort distributions: "sdist bdist_wheel" + docs_dir: build/sphinx/html diff --git a/README.rst b/README.rst index e88944a..973e442 100644 --- a/README.rst +++ b/README.rst @@ -7,14 +7,10 @@ natsort .. image:: https://coveralls.io/repos/SethMMorton/natsort/badge.png?branch=develop :target: https://coveralls.io/r/SethMMorton/natsort?branch=develop -Natural sorting for python. ``natsort`` requires python version 2.6 or greater -(this includes python 3.x). To run version 2.6, 3.0, or 3.1 the -`argparse `_ module is required. +Natural sorting for python. Check out the source code at +https://github.com/SethMMorton/natsort. -``natsort`` comes with a shell script that is described below. You can -also execute ``natsort`` from the command line with ``python -m natsort``. - -Problem Statement +Quick Description ----------------- When you try to sort a list of strings that contain numbers, the normal python @@ -27,28 +23,30 @@ expect:: Notice that it has the order ('1', '10', '2') - this is because the list is being sorted in lexicographical order, which sorts numbers like you would -letters (i.e. 'a', 'at', 'b'). It would be better if you had a sorting -algorithm that recognized numbers as numbers and treated them like numbers, -not letters. - -This is where ``natsort`` comes in: it provides a key that helps sort lists -"naturally". It provides support for ints and floats (including negatives and -exponential notation), and also a function specifically for sorting version -numbers. +letters (i.e. 'b', 'ba', 'c'). -Synopsis --------- - -Using ``natsort`` is simple:: +``natsort`` provides a function ``natsorted`` that helps sort lists "naturally", +either as real numbers (i.e. signed/unsigned floats or ints), or as versions. +Using ``natsorted`` is simple:: >>> from natsort import natsorted >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] >>> natsorted(a) ['a1', 'a2', 'a4', 'a9', 'a10'] -``natsort`` identifies the numbers and sorts them separately from strings. +``natsorted`` identifies real numbers anywhere in a string and sorts them +naturally. + +Sorting version numbers is just as easy:: + + >>> from natsort import versorted + >>> a = ['version-1.9', 'version-2.0', 'version-1.11', 'version-1.10'] + >>> versorted(a) + ['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0'] + >>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work + ['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10'] -You can also mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types +You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types when you sort:: >>> a = ['4.5', 6, 2.0, '5', 'a'] @@ -57,352 +55,44 @@ when you sort:: >>> # On Python 2, sorted(a) would return [2.0, 6, '4.5', '5', 'a'] >>> # On Python 3, sorted(a) would raise an "unorderable types" TypeError -The natsort algorithm will recursively descend into lists of lists so you can sort by -the sublist contents:: - - >>> data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']] - >>> sorted(data) - [['a1', 'a40'], ['a1', 'a5'], ['a10', 'a1'], ['a2', 'a5']] - >>> natsorted(data) - [['a1', 'a5'], ['a1', 'a40'], ['a2', 'a5'], ['a10', 'a1']] - -There is also a special convenience function provided that is best for sorting -version numbers:: - - >>> from natsort import versorted - >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] - >>> versorted(a) - ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] - -The Sorting Algorithms -'''''''''''''''''''''' - -Sometimes you want to sort by floats, sometimes by ints, and sometimes simply -by digits. ``natsort`` supports all three number types. They can be chosen -with the ``number_type`` argument to ``natsorted``. - -Sort by floats -++++++++++++++ - -By default, ``natsort`` searches for floats (even in exponential -notation!). This means that it will look for things like negative -signs and decimal points when determining a number:: - - >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300'] - >>> sorted(a) - ['a5.034e1', 'a50', 'a50.300', 'a50.4', 'a51.'] - >>> natsorted(a, number_type=float) - ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] - >>> natsorted(a) # Float is the default behavior - ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] - -Sort by ints -++++++++++++ - -In some cases you don't want ``natsort`` to identify your numbers as floats, -particularly if you are sorting version numbers. This is because you want the -version '1.10' to come after '1.2', not before. In that case, it is advantageous -to sort by ints, not floats:: - - >>> a = ['ver1.9.9a', 'ver1.11', 'ver1.9.9b', 'ver1.11.4', 'ver1.10.1'] - >>> sorted(a) - ['ver1.10.1', 'ver1.11', 'ver1.11.4', 'ver1.9.9a', 'ver1.9.9b'] - >>> natsorted(a) - ['ver1.10.1', 'ver1.11', 'ver1.11.4', 'ver1.9.9a', 'ver1.9.9b'] - >>> natsorted(a, number_type=int) - ['ver1.9.9a', 'ver1.9.9b', 'ver1.10.1', 'ver1.11', 'ver1.11.4'] - -Sort by digits (best for version numbers) -+++++++++++++++++++++++++++++++++++++++++ - -The only difference between sorting by ints and sorting by digits is that -sorting by ints may take into account a negative sign, and sorting by digits -will not. This may be an issue if you used a '-' as your separator before the -version numbers. Essentially this is a shortcut for a number type of ``int`` -and the ``signed`` option of ``False``:: - - >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] - >>> natsorted(a, number_type=int) - ['ver-2.9.9a', 'ver-2.9.9b', 'ver-1.10.1', 'ver-1.11', 'ver-1.11.4'] - >>> natsorted(a, number_type=None) - ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] - -The ``versorted`` function is simply a wrapper for ``number_type=None``, -and if you need to sort just version numbers it is best to use the -``versorted`` function for clarity:: - - >>> natsorted(a, number_type=None) == versorted(a) - True - -Using a sorting key -''''''''''''''''''' - -Like the built-in ``sorted`` function, ``natsorted`` can accept a key so that -you can sort based on a particular item of a list or by an attribute of a class:: - - >>> from operator import attrgetter, itemgetter - >>> a = [['num4', 'b'], ['num8', 'c'], ['num2', 'a']] - >>> natsorted(a, key=itemgetter(0)) - [['num2', 'a'], ['num4', 'b'], ['num8', 'c']] - >>> class Foo: - ... def __init__(self, bar): - ... self.bar = bar - ... def __repr__(self): - ... return "Foo('{0}')".format(self.bar) - >>> b = [Foo('num3'), Foo('num5'), Foo('num2')] - >>> natsorted(b, key=attrgetter('bar')) - [Foo('num2'), Foo('num3'), Foo('num5')] - -API ---- - -The ``natsort`` package provides five functions: ``natsort_key``, -``natsorted``, ``versorted``, ``index_natsorted``, and ``index_versorted``. -You can look at the unit tests to see more thorough examples of how -``natsort`` can be used. - -natsorted -''''''''' - -``natsort.natsorted`` (*sequence*, *key* = ``lambda x: x``, *number_type* = ``float``, *signed* = ``True``, *exp* = ``True``) - - sequence (*iterable*) - The sequence to sort. - - key (*function*) - A key used to determine how to sort each element of the sequence. - - number_type (``None``, ``float``, ``int``) - The types of number to sort by: ``float`` searches for floating point numbers, - ``int`` searches for integers, and ``None`` searches for digits (like integers - but does not take into account negative sign). ``None`` is a shortcut for - ``number_type = int`` and ``signed = False``. - - signed (``True``, ``False``) - By default a '+' or '-' before a number is taken to be the sign of the number. - If ``signed`` is ``False``, any '+' or '-' will not be considered to be part - of the number, but as part of the string. - - exp (``True``, ``False``) - This option only applies to ``number_type = float``. If ``exp = True``, a string - like ``"3.5e5"`` will be interpreted as ``350000``, i.e. the exponential part - is considered to be part of the number. If ``exp = False``, ``"3.5e5"`` is - interpreted as ``(3.5, "e", 5)``. The default behavior is ``exp = True``. - - returns - The sorted sequence. - -Use ``natsorted`` just like the builtin ``sorted``:: - - >>> from natsort import natsorted - >>> a = ['num3', 'num5', 'num2'] - >>> natsorted(a) - ['num2', 'num3', 'num5'] - -versorted -''''''''' - -``natsort.versorted`` (*sequence*, *key* = ``lambda x: x``) - - sequence (*iterable*) - The sequence to sort. - - key (*function*) - A key used to determine how to sort each element of the sequence. - - returns - The sorted sequence. - -Use ``versorted`` just like the builtin ``sorted``:: - - >>> from natsort import versorted - >>> a = ['num4.0.2', 'num3.4.1', 'num3.4.2'] - >>> versorted(a) - ['num3.4.1', 'num3.4.2', 'num4.0.2'] - -This is a wrapper around ``natsorted(seq, number_type=None)``, and is used -to easily sort version numbers. - -index_natsorted -''''''''''''''' - -``natsort.index_natsorted`` (*sequence*, *key* = ``lambda x: x``, *number_type* = ``float``, *signed* = ``True``, *exp* = ``True``) - - sequence (*iterable*) - The sequence to sort. - - key (*function*) - A key used to determine how to sort each element of the sequence. - - number_type (``None``, ``float``, ``int``) - The types of number to sort on: ``float`` searches for floating point numbers, - ``int`` searches for integers, and ``None`` searches for digits (like integers - but does not take into account negative sign). ``None`` is a shortcut for - ``number_type = int`` and ``signed = False``. - - signed (``True``, ``False``) - By default a '+' or '-' before a number is taken to be the sign of the number. - If ``signed`` is ``False``, any '+' or '-' will not be considered to be part - of the number, but as part part of the string. +The natsort algorithm does other fancy things like - exp (``True``, ``False``) - This option only applies to ``number_type = float``. If ``exp = True``, a string - like ``"3.5e5"`` will be interpreted as ``350000``, i.e. the exponential part - is considered to be part of the number. If ``exp = False``, ``"3.5e5"`` is - interpreted as ``(3.5, "e", 5)``. The default behavior is ``exp = True``. + - recursively descend into lists of lists + - sort file paths correctly + - allow custom sorting keys + - allow exposed a natsort_key generator to pass to list.sort - returns - The ordered indexes of the sequence. +Please see the package documentation for more details, including additional examples +and recipes. -Use ``index_natsorted`` if you want to sort multiple lists by the sort order of -one list:: - - >>> from natsort import index_natsorted - >>> a = ['num3', 'num5', 'num2'] - >>> b = ['foo', 'bar', 'baz'] - >>> index = index_natsorted(a) - >>> index - [2, 0, 1] - >>> # Sort both lists by the sort order of a - >>> [a[i] for i in index] - ['num2', 'num3', 'num5'] - >>> [b[i] for i in index] - ['baz', 'foo', 'bar'] - -index_versorted -''''''''''''''' - -``natsort.index_versorted`` (*sequence*, *key* = ``lambda x: x``) - - sequence (*iterable*) - The sequence to sort. - - key (*function*) - A key used to determine how to sort each element of the sequence. - - returns - The ordered indexes of the sequence. - -Use ``index_versorted`` just like the builtin sorted:: - - >>> from natsort import index_versorted - >>> a = ['num4.0.2', 'num3.4.1', 'num3.4.2'] - >>> index_versorted(a) - [1, 2, 0] - -This is a wrapper around ``index_natsorted(seq, number_type=None)``, and is used -to easily sort version numbers by their indexes. - -natsort_key -''''''''''' - -``natsort.natsort_key`` (value, *number_type* = ``float``, *signed* = ``True``, *exp* = ``True``, *py3_safe* = ``False``) - - value - The value used by the sorting algorithm - - number_type (``None``, ``float``, ``int``) - The types of number to sort on: ``float`` searches for floating point numbers, - ``int`` searches for integers, and ``None`` searches for digits (like integers - but does not take into account negative sign). ``None`` is a shortcut for - ``number_type = int`` and ``signed = False``. - - signed (``True``, ``False``) - By default a '+' or '-' before a number is taken to be the sign of the number. - If ``signed`` is ``False``, any '+' or '-' will not be considered to be part - of the number, but as part part of the string. - - exp (``True``, ``False``) - This option only applies to ``number_type = float``. If ``exp = True``, a string - like ``"3.5e5"`` will be interpreted as ``350000``, i.e. the exponential part - is considered to be part of the number. If ``exp = False``, ``"3.5e5"`` is - interpreted as ``(3.5, "e", 5)``. The default behavior is ``exp = True``. - - py3_safe (``True``, ``False``) - This will make the string parsing algorithm be more careful by placing - an empty string between two adjacent numbers after the parsing algorithm. - This will prevent the "unorderable types" error. - - returns - The modified value with numbers extracted. - -Using ``natsort_key`` is just like any other sorting key in python:: - - >>> from natsort import natsort_key - >>> a = ['num3', 'num5', 'num2'] - >>> a.sort(key=natsort_key) - >>> a - ['num2', 'num3', 'num5'] - -It works by separating out the numbers from the strings:: - - >>> natsort_key('num2') - ('num', 2.0) - -If you need to call ``natsort_key`` with the ``number_type`` argument, or get a special -attribute or item of each element of the sequence, the easiest way is to make a -``lambda`` expression that calls ``natsort_key``:: - - >>> from operator import itemgetter - >>> a = [['num4', 'b'], ['num8', 'c'], ['num2', 'a']] - >>> f = itemgetter(0) - >>> a.sort(key=lambda x: natsort_key(f(x), number_type=int)) - >>> a - [['num2', 'a'], ['num4', 'b'], ['num8', 'c']] - -Shell Script +Shell script ------------ -For your convenience, there is a ``natsort`` shell script supplied to you that -allows you to call ``natsort`` from the command-line. ``natsort`` was written to -aid in computational chemistry research so that it would be easy to analyze -large sets of output files named after the parameter used:: - - $ ls *.out - mode1000.35.out mode1243.34.out mode744.43.out mode943.54.out +``natsort`` comes with a shell script called ``natsort``, or can also be called +from the command line with ``python -m natsort``. The command line script is +only installed onto your ``PATH`` if you don't install via a wheel. There is +apparently a known bug with the wheel installation process that will not create +entry points. -(Obviously, in reality there would be more files, but you get the idea.) Notice -that the shell sorts in lexicographical order. This is the behavior of programs like -``find`` as well as ``ls``. The problem is in passing these files to an -analysis program that causes them not to appear in numerical order, which can lead -to bad analysis. To remedy this, use ``natsort``:: - - # This won't get you what you want - $ foo *.out - # This will sort naturally - $ natsort *.out - mode744.43.out - mode943.54.out - mode1000.35.out - mode1243.34.out - $ natsort *.out | xargs foo - -You can also filter out numbers using the ``natsort`` command-line script:: - - $ natsort *.out -f 900 1100 # Select only numbers between 900-1100 - mode943.54.out - mode1000.35.out - -If needed, you can exclude specific numbers:: - - $ natsort *.out -e 1000.35 # Exclude 1000.35 from search - mode744.43.out - mode943.54.out - mode1243.34.out - -For other options, use ``natsort --help``. In general, the other options mirror -the ``natsorted`` API. +Requirements +------------ -It is also helpful to note that ``natsort`` accepts pipes. +``natsort`` requires python version 2.6 or greater +(this includes python 3.x). To run version 2.6, 3.0, or 3.1 the +`argparse `_ module is required. -Note to users of the ``natsort`` shell script from < v. 3.1.0 -''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' +Depreciation Notices +-------------------- -The ``natsort`` shell script options and implementation for version 3.1.0 has -changed slightly. Options relating to interpreting input as file or directory -paths have been removed, and internally the input is no longer treated as file -paths. In most situations, this should not give different results, but in -some unique cases it may. Feel free to contact me if this ruins your work flow. + - In ``natsort`` version 4.0.0, the ``natsort_key`` function will be removed + from the public API. All future development should use ``natsort_keygen`` + in preparation for this. + - In ``natsort`` version 3.1.0, the shell script changed how it interpreted + input; previously, all input was assumed to be a filepath, but as of 3.1.0 + input is just treated as a string. For most cases the results are the same. + + - As of ``natsort`` version 3.4.0, a ``--path`` option has been added to + force the shell script to interpret the input as filepaths. Author ------ @@ -412,6 +102,9 @@ Seth M. Morton History ------- +These are the last three entries of the changelog. See the package documentation +for the complete changelog. + XX-XX-2014 v. 3.4.0 ''''''''''''''''''' @@ -420,7 +113,7 @@ XX-XX-2014 v. 3.4.0 - Added a 'natsort_keygen' function that will generate a wrapped version of 'natsort_key' that is easier to call. 'natsort_key' is now set to depreciate at natsort version 4.0.0. - - Added an 'as_path' option to 'natsorted' and co. that will try to treat + - Added an 'as_path' option to 'natsorted' & co. that will try to treat input strings as filepaths. This will help yield correct results for OS-generated inputs like ``['/p/q/o.x', '/p/q (1)/o.x', '/p/q (10)/o.x', '/p/q/o (1).x']``. @@ -428,17 +121,18 @@ XX-XX-2014 v. 3.4.0 of reduction in speed for numeric input (~2.0x). - This is a good compromise because the most common input will be strings, - not numbers. If you are sorting only numbers, you would use 'sorted'. - - Sorting numbers still only takes 0.6x the time of sorting strings. + not numbers, and sorting numbers still only takes 0.6x the time of sorting + strings. If you are sorting only numbers, you would use 'sorted' anyway. - Added the 'order_by_index' function to help in using the output of 'index_natsorted' and 'index_versorted'. - - Added the 'reverse' option to 'natsorted' and co. to make it's API more + - Added the 'reverse' option to 'natsorted' & co. to make it's API more similar to the builtin 'sorted'. - Added more unit tests. - - Added auxillary test code that helps in profiling and stress-testing. + - Added auxiliary test code that helps in profiling and stress-testing. - Reworked the documentation, moving most of it to PyPI's hosting platform. - Added support for coveralls.io. + - Entire codebase is now PyFlakes and PEP8 compliant. 06-28-2014 v. 3.3.0 ''''''''''''''''''' @@ -460,103 +154,3 @@ XX-XX-2014 v. 3.4.0 - Re-"Fixed" unorderable types issue on Python 3.x - this workaround is for when the problem occurs in the middle of the string. - -05-07-2014 v. 3.2.0 -''''''''''''''''''' - - - "Fixed" unorderable types issue on Python 3.x with a workaround that - attempts to replicate the Python 2.x behavior by putting all the numbers - (or strings that begin with numbers) first. - - Now explicitly excluding __pycache__ from releases by adding a prune statement - to MANIFEST.in. - -05-05-2014 v. 3.1.2 -''''''''''''''''''' - - - Added setup.cfg to support universal wheels. - - Added Python 3.0 and Python 3.1 as requiring the argparse module. - -03-01-2014 v. 3.1.1 -''''''''''''''''''' - - - Added ability to sort lists of lists. - - Cleaned up import statements. - -01-20-2014 v. 3.1.0 -''''''''''''''''''' - - - Added the ``signed`` and ``exp`` options to allow finer tuning of the sorting - - Entire codebase now works for both Python 2 and Python 3 without needing to run - ``2to3``. - - Updated all doctests. - - Further simplified the ``natsort`` base code by removing unneeded functions. - - Simplified documentation where possible. - - Improved the shell script code - - - Made the documentation less "path"-centric to make it clear it is not just - for sorting file paths. - - Removed the filesystem-based options because these can be achieved better - though a pipeline. - - Added doctests. - - Added new options that correspond to ``signed`` and ``exp``. - - The user can now specify multiple numbers to exclude or multiple ranges - to filter by. - -10-01-2013 v. 3.0.2 -''''''''''''''''''' - - - Made float, int, and digit searching algorithms all share the same base function. - - Fixed some outdated comments. - - Made the ``__version__`` variable available when importing the module. - -8-15-2013 v. 3.0.1 -'''''''''''''''''' - - - Added support for unicode strings. - - Removed extraneous ``string2int`` function. - - Fixed empty string removal function. - -7-13-2013 v. 3.0.0 -'''''''''''''''''' - - - Added a ``number_type`` argument to the sorting functions to specify how - liberal to be when deciding what a number is. - - Reworked the documentation. - -6-25-2013 v. 2.2.0 -'''''''''''''''''' - - - Added ``key`` attribute to ``natsorted`` and ``index_natsorted`` so that - it mimics the functionality of the built-in ``sorted`` - - Added tests to reflect the new functionality, as well as tests demonstrating - how to get similar functionality using ``natsort_key``. - -12-5-2012 v. 2.1.0 -'''''''''''''''''' - - - Reorganized package. - - Now using a platform independent shell script generator (entry_points - from distribute). - - Can now execute natsort from command line with ``python -m natsort`` - as well. - -11-30-2012 v. 2.0.2 -''''''''''''''''''' - - - Added the use_2to3 option to setup.py. - - Added distribute_setup.py to the distribution. - - Added dependency to the argparse module (for python2.6). - -11-21-2012 v. 2.0.1 -''''''''''''''''''' - - - Reorganized directory structure. - - Added tests into the natsort.py file iteself. - -11-16-2012, v. 2.0.0 -'''''''''''''''''''' - - - Updated sorting algorithm to support floats (including exponentials) and - basic version number support. - - Added better README documentation. - - Added doctests. diff --git a/docs/source/api.rst b/docs/source/api.rst new file mode 100644 index 0000000..7546de6 --- /dev/null +++ b/docs/source/api.rst @@ -0,0 +1,18 @@ +.. default-domain:: py +.. currentmodule:: natsort + +.. _api: + +natsort API +=========== + +.. toctree:: + :maxdepth: 2 + + natsort_keygen.rst + natsort_key.rst + natsorted.rst + versorted.rst + index_natsorted.rst + index_versorted.rst + order_by_index.rst diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst new file mode 100644 index 0000000..936a92b --- /dev/null +++ b/docs/source/changelog.rst @@ -0,0 +1,154 @@ +.. _changelog: + +Changelog +--------- + +XX-XX-2014 v. 3.4.0 +''''''''''''''''''' + + - Fixed a bug that caused user's options to the 'natsort_key' to not be + passed on to recursive calls of 'natsort_key'. + - Added a 'natsort_keygen' function that will generate a wrapped version + of 'natsort_key' that is easier to call. 'natsort_key' is now set to + depreciate at natsort version 4.0.0. + - Added an 'as_path' option to 'natsorted' & co. that will try to treat + input strings as filepaths. This will help yield correct results for + OS-generated inputs like + ``['/p/q/o.x', '/p/q (1)/o.x', '/p/q (10)/o.x', '/p/q/o (1).x']``. + - Massive performance enhancements for string input (1.8x-2.0x), at the expense + of reduction in speed for numeric input (~2.0x). + + - This is a good compromise because the most common input will be strings, + not numbers, and sorting numbers still only takes 0.6x the time of sorting + strings. If you are sorting only numbers, you would use 'sorted' anyway. + + - Added the 'order_by_index' function to help in using the output of + 'index_natsorted' and 'index_versorted'. + - Added the 'reverse' option to 'natsorted' & co. to make it's API more + similar to the builtin 'sorted'. + - Added more unit tests. + - Added auxillary test code that helps in profiling and stress-testing. + - Reworked the documentation, moving most of it to PyPI's hosting platform. + - Added support for coveralls.io. + - Entire codebase is now PyFlakes and PEP8 compliant. + +06-28-2014 v. 3.3.0 +''''''''''''''''''' + + - Added a 'versorted' method for more convenient sorting of versions. + - Updated command-line tool --number_type option with 'version' and 'ver' + to make it more clear how to sort version numbers. + - Moved unit-testing mechanism from being docstring-based to actual unit tests + in actual functions. + + - This has provided the ability determine the coverage of the unit tests (99%). + - This also makes the pydoc documentation a bit more clear. + + - Made docstrings for public functions mirror the README API. + - Connected natsort development to Travis-CI to help ensure quality releases. + +06-20-2014 v. 3.2.1 +''''''''''''''''''' + + - Re-"Fixed" unorderable types issue on Python 3.x - this workaround + is for when the problem occurs in the middle of the string. + +05-07-2014 v. 3.2.0 +''''''''''''''''''' + + - "Fixed" unorderable types issue on Python 3.x with a workaround that + attempts to replicate the Python 2.x behavior by putting all the numbers + (or strings that begin with numbers) first. + - Now explicitly excluding __pycache__ from releases by adding a prune statement + to MANIFEST.in. + +05-05-2014 v. 3.1.2 +''''''''''''''''''' + + - Added setup.cfg to support universal wheels. + - Added Python 3.0 and Python 3.1 as requiring the argparse module. + +03-01-2014 v. 3.1.1 +''''''''''''''''''' + + - Added ability to sort lists of lists. + - Cleaned up import statements. + +01-20-2014 v. 3.1.0 +''''''''''''''''''' + + - Added the ``signed`` and ``exp`` options to allow finer tuning of the sorting + - Entire codebase now works for both Python 2 and Python 3 without needing to run + ``2to3``. + - Updated all doctests. + - Further simplified the ``natsort`` base code by removing unneeded functions. + - Simplified documentation where possible. + - Improved the shell script code + + - Made the documentation less "path"-centric to make it clear it is not just + for sorting file paths. + - Removed the filesystem-based options because these can be achieved better + though a pipeline. + - Added doctests. + - Added new options that correspond to ``signed`` and ``exp``. + - The user can now specify multiple numbers to exclude or multiple ranges + to filter by. + +10-01-2013 v. 3.0.2 +''''''''''''''''''' + + - Made float, int, and digit searching algorithms all share the same base function. + - Fixed some outdated comments. + - Made the ``__version__`` variable available when importing the module. + +8-15-2013 v. 3.0.1 +'''''''''''''''''' + + - Added support for unicode strings. + - Removed extraneous ``string2int`` function. + - Fixed empty string removal function. + +7-13-2013 v. 3.0.0 +'''''''''''''''''' + + - Added a ``number_type`` argument to the sorting functions to specify how + liberal to be when deciding what a number is. + - Reworked the documentation. + +6-25-2013 v. 2.2.0 +'''''''''''''''''' + + - Added ``key`` attribute to ``natsorted`` and ``index_natsorted`` so that + it mimics the functionality of the built-in ``sorted`` + - Added tests to reflect the new functionality, as well as tests demonstrating + how to get similar functionality using ``natsort_key``. + +12-5-2012 v. 2.1.0 +'''''''''''''''''' + + - Reorganized package. + - Now using a platform independent shell script generator (entry_points + from distribute). + - Can now execute natsort from command line with ``python -m natsort`` + as well. + +11-30-2012 v. 2.0.2 +''''''''''''''''''' + + - Added the use_2to3 option to setup.py. + - Added distribute_setup.py to the distribution. + - Added dependency to the argparse module (for python2.6). + +11-21-2012 v. 2.0.1 +''''''''''''''''''' + + - Reorganized directory structure. + - Added tests into the natsort.py file iteself. + +11-16-2012, v. 2.0.0 +'''''''''''''''''''' + + - Updated sorting algorithm to support floats (including exponentials) and + basic version number support. + - Added better README documentation. + - Added doctests. diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..ee8ea53 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,280 @@ +# -*- coding: utf-8 -*- +# +# natsort documentation build configuration file, created by +# sphinx-quickstart on Thu Jul 17 21:01:29 2014. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import os +import re + +def current_version(): + # Read the _version.py file for the module version number + VERSIONFILE = os.path.join('..', '..', 'natsort', '_version.py') + versionsearch = re.compile(r"^__version__ = ['\"]([^'\"]*)['\"]") + with open(VERSIONFILE, "rt") as fl: + for line in fl: + m = versionsearch.search(line) + if m: + return m.group(1) + else: + s = "Unable to locate version string in {0}" + raise RuntimeError(s.format(VERSIONFILE)) + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.intersphinx', + 'numpydoc', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'natsort' +copyright = u'2014, Seth M. Morton' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The full version, including alpha/beta/rc tags. +release = current_version() +# The short X.Y version. +version = '.'.join(release.split('.')[0:2]) + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['solar/*'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' +highlight_language = 'python' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'solar' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +html_theme_path = ['.'] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'natsortdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ('index', 'natsort.tex', u'natsort Documentation', + u'Seth M. Morton', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'natsort', u'natsort Documentation', + [u'Seth M. Morton'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'natsort', u'natsort Documentation', + u'Seth M. Morton', 'natsort', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'http://docs.python.org/': None} diff --git a/docs/source/examples.rst b/docs/source/examples.rst new file mode 100644 index 0000000..9704495 --- /dev/null +++ b/docs/source/examples.rst @@ -0,0 +1,150 @@ +.. default-domain:: py +.. currentmodule:: natsort + +.. _examples: + +Examples and Recipes +==================== + +If you want more detailed examples than given on this page, please see +https://github.com/SethMMorton/natsort/tree/master/test_natsort. + +Basic Usage +----------- + +In the most basic use case, simply import :func:`~natsorted` and use +it as you would :func:`sorted`:: + + >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300'] + >>> sorted(a) + ['a5.034e1', 'a50', 'a50.300', 'a50.4', 'a51.'] + >>> from natsort import natsorted + >>> natsorted(a) + ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] + +Customizing Float Definition +---------------------------- + +By default :func:`~natsorted` searches for any float that would be +a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc. +Perhaps you don't want to search for signed numbers, or you don't +want to search for exponential notation, and the ``signed`` and +``exp`` options allow you to do this:: + + >>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300'] + >>> natsorted(a) + ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] + >>> natsorted(a, signed=False) + ['a50', 'a5.034e1', 'a51.', 'a+50.300', 'a+50.4'] + >>> natsorted(a, exp=False) + ['a5.034e1', 'a50', 'a+50.300', 'a+50.4', 'a51.'] + +Sort Version Numbers +-------------------- + +With default options, :func:`~natsorted` will not sort version numbers +well. Version numbers are best sorted by searching for valid unsigned int +literals, not floats. This can be achieved in three ways, as shown below:: + + >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] + >>> natsorted(a) # This gives incorrect results + ['ver-2.9.9a', 'ver-2.9.9b', 'ver-1.11', 'ver-1.11.4', 'ver-1.10.1'] + >>> natsorted(a, number_type=int, signed=False) + ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] + >>> natsorted(a, number_type=None) + ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] + >>> from natsort import versorted + >>> versorted(a) + ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] + +You can see that ``number_type=None`` is a shortcut for ``number_type=int`` +and ``signed=False``, and the :func:`~versorted` is a shortcut for +``natsorted(number_type=None)``. The recommend manner to sort version +numbers is to use :func:`~versorted`. + +Sort OS-Generated Paths +----------------------- + +In some cases when sorting file paths with OS-Generated names, the default +:mod:`~natsorted` algorithm may not be sufficient. In cases like these, +you may need to use the ``as_path`` option:: + + >>> a = ['./folder/file (1).txt', + ... './folder/file.txt', + ... './folder (1)/file.txt', + ... './folder (10)/file.txt'] + >>> natsorted(a) + ['./folder (1)/file.txt', './folder (10)/file.txt', './folder/file (1).txt', './folder/file.txt'] + >>> natsorted(a, as_path=True) + ['./folder/file.txt', './folder/file (1).txt', './folder (1)/file.txt', './folder (10)/file.txt'] + +Using a Custom Sorting Key +-------------------------- + +Like the built-in ``sorted`` function, ``natsorted`` can accept a custom +sort key so that:: + + >>> from operator import attrgetter, itemgetter + >>> a = [['a', 'num4'], ['b', 'num8'], ['c', 'num2']] + >>> natsorted(a, key=itemgetter(1)) + [['c', 'num2'], ['a', 'num4'], ['b', 'num8']] + >>> class Foo: + ... def __init__(self, bar): + ... self.bar = bar + ... def __repr__(self): + ... return "Foo('{0}')".format(self.bar) + >>> b = [Foo('num3'), Foo('num5'), Foo('num2')] + >>> natsorted(b, key=attrgetter('bar')) + [Foo('num2'), Foo('num3'), Foo('num5')] + +Generating a Natsort Key +------------------------ + +If you need to sort a list in-place, you cannot use :func:`~natsorted`; you +need to pass a key to the :meth:`list.sort` method. The function +:func:`~natsort_keygen` is a convenient way to generate these keys for you:: + + >>> from natsort import natsort_keygen + >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300'] + >>> natsort_key = natsort_keygen() + >>> a.sort(key=natsort_key) + >>> a + ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] + >>> versort_key = natsort_keygen(number_type=None) + >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] + >>> a.sort(key=versort_key) + >>> a + ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] + +:func:`~natsort_keygen` has the same API as :func:`~natsorted`. + +Sorting Multiple Lists According to a Single List +------------------------------------------------- + +Sometimes you have multiple lists, and you want to sort one of those +lists and reorder the other lists according to how the first was sorted. +To achieve this you would use the :func:`~index_natsorted` or +:func:`~index_versorted` in combination with the convenience function +:func:`~order_by_index`:: + + >>> from natsort import index_natsorted, order_by_index + >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] + >>> b = [4, 5, 6, 7, 8] + >>> c = ['hi', 'lo', 'ah', 'do', 'up'] + >>> index = index_natsorted(a) + >>> order_by_index(a, index) + ['a1', 'a2', 'a4', 'a9', 'a10'] + >>> order_by_index(b, index) + [6, 4, 7, 5, 8] + >>> order_by_index(c, index) + ['ah', 'hi', 'do', 'lo', 'up'] + +Returning Results in Reverse Order +---------------------------------- + +Just like the :func:`sorted` built-in function, you can supply the +``reverse`` option to return the results in reverse order:: + + >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] + >>> natsorted(a, reverse=True) + ['a10', 'a9', 'a4', 'a2', 'a1'] diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..a6fd97c --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,27 @@ +.. natsort documentation master file, created by + sphinx-quickstart on Thu Jul 17 21:01:29 2014. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +natsort: Natural Sorting for Python +=================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + :numbered: + + intro.rst + examples.rst + api.rst + shell.rst + changelog.rst + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/source/index_natsorted.rst b/docs/source/index_natsorted.rst new file mode 100644 index 0000000..ea48f25 --- /dev/null +++ b/docs/source/index_natsorted.rst @@ -0,0 +1,8 @@ +.. default-domain:: py +.. currentmodule:: natsort + +:func:`~natsort.index_natsorted` +================================ + +.. autofunction:: index_natsorted + diff --git a/docs/source/index_versorted.rst b/docs/source/index_versorted.rst new file mode 100644 index 0000000..07e266f --- /dev/null +++ b/docs/source/index_versorted.rst @@ -0,0 +1,8 @@ +.. default-domain:: py +.. currentmodule:: natsort + +:func:`~natsort.index_versorted` +================================ + +.. autofunction:: index_versorted + diff --git a/docs/source/intro.rst b/docs/source/intro.rst new file mode 100644 index 0000000..d4977e8 --- /dev/null +++ b/docs/source/intro.rst @@ -0,0 +1,116 @@ +.. default-domain:: py +.. module:: natsort + +The :mod:`natsort` module +========================= + +Natural sorting for python. Check out the source code at +https://github.com/SethMMorton/natsort. + +:mod:`natsort` was initially created for sorting scientific output filenames that +contained floating point numbers in the names. There was a serious lack of +algorithms out there that could perform a natural sort on `floats` but +plenty for ints; check out +`this StackOverflow question `_ +and its answers and links therein, +`this ActiveState forum `_, +and of course `this great article on natural sorting `_ +from CodingHorror.com for examples of what I mean. +:mod:`natsort` was created to fill in this gap. It has since grown +and can now sort version numbers (which seems to be the +most common use case based on user feedback) as well as some other nice features. + +Quick Description +----------------- + +When you try to sort a list of strings that contain numbers, the normal python +sort algorithm sorts lexicographically, so you might not get the results that you +expect:: + + >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] + >>> sorted(a) + ['a1', 'a10', 'a2', 'a4', 'a9'] + +Notice that it has the order ('1', '10', '2') - this is because the list is +being sorted in lexicographical order, which sorts numbers like you would +letters (i.e. 'b', 'ba', 'c'). + +:mod:`natsort` provides a function :func:`~natsorted` that helps sort lists +"naturally", either as real numbers (i.e. signed/unsigned floats or ints), +or as versions. Using :func:`~natsorted` is simple:: + + >>> from natsort import natsorted + >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] + >>> natsorted(a) + ['a1', 'a2', 'a4', 'a9', 'a10'] + +:func:`~natsorted` identifies real numbers anywhere in a string and sorts them +naturally. + +Sorting version numbers is just as easy with :func:`~versorted`:: + + >>> from natsort import versorted + >>> a = ['version-1.9', 'version-2.0', 'version-1.11', 'version-1.10'] + >>> versorted(a) + ['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0'] + >>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work + ['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10'] + +You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types +when you sort:: + + >>> a = ['4.5', 6, 2.0, '5', 'a'] + >>> natsorted(a) + [2.0, '4.5', '5', 6, 'a'] + >>> # On Python 2, sorted(a) would return [2.0, 6, '4.5', '5', 'a'] + >>> # On Python 3, sorted(a) would raise an "unorderable types" TypeError + +The natsort algorithm does other fancy things like + + - recursively descend into lists of lists + - sort file paths correctly + - allow custom sorting keys + - allow exposed a natsort_key generator to pass to list.sort + +Please see the :ref:`examples` for a quick start guide, or the :ref:`api` +for more details. + +Installation +------------ + +Installation of :mod:`natsort` is ultra-easy. Simply execute from the +command line:: + + easy_install natsort + +or, if you have ``pip`` (preferred over ``easy_install``):: + + pip install natsort + +Both of the above commands will download the source for you. + +You can also download the source from http://pypi.python.org/pypi/natsort, +or browse the git repository at https://github.com/SethMMorton/natsort. + +If you choose to install from source, you can unzip the source archive and +enter the directory, and type:: + + python setup.py install + +If you wish to run the unit tests, enter:: + + python setup.py test + +If you want to build this documentation, enter:: + + python setup.py build_sphinx + +:mod:`natsort` requires python version 2.6 or greater +(this includes python 3.x). To run version 2.6, 3.0, or 3.1 the +`argparse `_ module is required. + +:mod:`natsort` comes with a shell script called :mod:`natsort`, or can also be called +from the command line with ``python -m natsort``. The command line script is +only installed onto your ``PATH`` if you don't install via a wheel. There is +apparently a known bug with the wheel installation process that will not create +entry points. diff --git a/docs/source/natsort_key.rst b/docs/source/natsort_key.rst new file mode 100644 index 0000000..351b351 --- /dev/null +++ b/docs/source/natsort_key.rst @@ -0,0 +1,8 @@ +.. default-domain:: py +.. currentmodule:: natsort + +:func:`~natsort.natsort_key` +============================ + +.. autofunction:: natsort_key + diff --git a/docs/source/natsort_keygen.rst b/docs/source/natsort_keygen.rst new file mode 100644 index 0000000..b0d5988 --- /dev/null +++ b/docs/source/natsort_keygen.rst @@ -0,0 +1,8 @@ +.. default-domain:: py +.. currentmodule:: natsort + +:func:`~natsort.natsort_keygen` +=============================== + +.. autofunction:: natsort_keygen + diff --git a/docs/source/natsorted.rst b/docs/source/natsorted.rst new file mode 100644 index 0000000..30b5692 --- /dev/null +++ b/docs/source/natsorted.rst @@ -0,0 +1,8 @@ +.. default-domain:: py +.. currentmodule:: natsort + +:func:`~natsort.natsorted` +========================== + +.. autofunction:: natsorted + diff --git a/docs/source/order_by_index.rst b/docs/source/order_by_index.rst new file mode 100644 index 0000000..b1d7681 --- /dev/null +++ b/docs/source/order_by_index.rst @@ -0,0 +1,8 @@ +.. default-domain:: py +.. currentmodule:: natsort + +:func:`~natsort.order_by_index` +=============================== + +.. autofunction:: order_by_index + diff --git a/docs/source/shell.rst b/docs/source/shell.rst new file mode 100644 index 0000000..e29a6fe --- /dev/null +++ b/docs/source/shell.rst @@ -0,0 +1,137 @@ +.. default-domain:: py +.. currentmodule:: natsort + +.. _shell: + +Shell Script +============ + +The ``natsort`` shell script is automatically installed when you install +:mod:`natsort` from "zip" or "tar.gz" via ``pip`` or ``easy_install`` +(there is a known bug with wheels that will not install the shell script). + +Below is the usage and some usage examples for the ``natsort`` shell script. + +Usage +----- + +:: + + usage: natsort [-h] [--version] [-p] [-f LOW HIGH] [-F LOW HIGH] + [-e EXCLUDE] [-r] [-t {digit,int,float,version,ver}] + [--nosign] [--noexp] + [entries [entries ...]] + + Performs a natural sort on entries given on the command-line. + A natural sort sorts numerically then alphabetically, and will sort + by numbers in the middle of an entry. + + positional arguments: + entries The entries to sort. Taken from stdin if nothing is + given on the command line. + + optional arguments: + -h, --help show this help message and exit + --version show program's version number and exit + -p, --paths Interpret the input as file paths. This is not + strictly necessary to sort all file paths, but in + cases where there are OS-generated file paths like + "Folder/" and "Folder (1)/", this option is needed to + make the paths sorted in the order you expect + ("Folder/" before "Folder (1)/"). + -f LOW HIGH, --filter LOW HIGH + Used for keeping only the entries that have a number + falling in the given range. + -F LOW HIGH, --reverse-filter LOW HIGH + Used for excluding the entries that have a number + falling in the given range. + -e EXCLUDE, --exclude EXCLUDE + Used to exclude an entry that contains a specific + number. + -r, --reverse Returns in reversed order. + -t {digit,int,float,version,ver}, --number-type {digit,int,float,version,ver} + Choose the type of number to search for. "float" will + search for floating-point numbers. "int" will only + search for integers. "digit", "version", and "ver" are + shortcuts for "int" with --nosign. + --nosign Do not consider "+" or "-" as part of a number, i.e. + do not take sign into consideration. + --noexp Do not consider an exponential as part of a number, + i.e. 1e4, would be considered as 1, "e", and 4, not as + 10000. This only effects the --number-type=float. + +Description +----------- + +``natsort`` was originally written to aid in computational chemistry +research so that it would be easy to analyze large sets of output files +named after the parameter used:: + + $ ls *.out + mode1000.35.out mode1243.34.out mode744.43.out mode943.54.out + +(Obviously, in reality there would be more files, but you get the idea.) Notice +that the shell sorts in lexicographical order. This is the behavior of programs like +``find`` as well as ``ls``. The problem is passing these files to an +analysis program causes them not to appear in numerical order, which can lead +to bad analysis. To remedy this, use ``natsort``:: + + $ natsort *.out + mode744.43.out + mode943.54.out + mode1000.35.out + mode1243.34.out + $ natsort *.out | xargs your_program + +You can also place natsort in the middle of a pipe:: + + $ find . -name "*.out" | natsort | xargs your_program + +To sort version numbers, use the ``--number-type version`` option +(or ``-t ver`` for short):: + + $ ls * + prog-1.10.zip prog-1.9.zip prog-2.0.zip + $ natsort -t ver * + prog-1.9.zip + prog-1.10.zip + prog-2.0.zip + +In general, all ``natsort`` shell script options mirror the :func:`~natsorted` API, +with notable exception of the ``--filter``, ``--reverse-filter``, and ``--exclude`` +options. These three options are used as follows:: + + $ ls *.out + mode1000.35.out mode1243.34.out mode744.43.out mode943.54.out + $ natsort *.out -f 900 1100 # Select only numbers between 900-1100 + mode943.54.out + mode1000.35.out + $ natsort *.out -F 900 1100 # Select only numbers NOT between 900-1100 + mode744.43.out + mode1243.34.out + $ natsort *.out -e 1000.35 # Exclude 1000.35 from search + mode744.43.out + mode943.54.out + mode1243.34.out + +If you are sorting paths with OS-generated filenames, you may require the +``--paths``/``-p`` option:: + + $ find . ! -path . -type f + ./folder/file (1).txt + ./folder/file.txt + ./folder (1)/file.txt + ./folder (10)/file.txt + ./folder (2)/file.txt + $ find . ! -path . -type f | natsort + ./folder (1)/file.txt + ./folder (2)/file.txt + ./folder (10)/file.txt + ./folder/file (1).txt + ./folder/file.txt + $ find . ! -path . -type f | natsort -p + ./folder/file.txt + ./folder/file (1).txt + ./folder (1)/file.txt + ./folder (2)/file.txt + ./folder (10)/file.txt diff --git a/docs/source/solar/NEWS.txt b/docs/source/solar/NEWS.txt new file mode 100644 index 0000000..d9743ee --- /dev/null +++ b/docs/source/solar/NEWS.txt @@ -0,0 +1,32 @@ +News +==== + +1.3 +--- +* Release date: 2012-11-01. +* Source Code Pro is now used for code samples. +* Reduced font size of pre elements. +* Horizontal rule for header elements. +* HTML pre contents are now wrapped (no scrollbars). +* Changed permalink color from black to a lighter one. + +1.2 +--- +* Release date: 2012-10-03. +* Style additional admonition levels. +* Increase padding for navigation links (minor). +* Add shadow for admonition items (minor). + +1.1 +--- +* Release date: 2012-09-05. +* Add a new background. +* Revert font of headings to Open Sans Light. +* Darker color for h3 - h6. +* Removed dependency on solarized dark pygments style. +* Nice looking scrollbars for pre element. + +1.0 +--- +* Release date: 2012-08-24. +* Initial release. diff --git a/docs/source/solar/README.rst b/docs/source/solar/README.rst new file mode 100644 index 0000000..caeedbd --- /dev/null +++ b/docs/source/solar/README.rst @@ -0,0 +1,28 @@ +Solar theme for Python Sphinx +============================= +Solar is an attempt to create a theme for Sphinx based on the `Solarized `_ color scheme. + +Preview +------- +http://vimalkumar.in/sphinx-themes/solar + +Download +-------- +Released versions are available from http://github.com/vkvn/sphinx-themes/downloads + +Installation +------------ +#. Extract the archive. +#. Modify ``conf.py`` of an existing Sphinx project or create new project using ``sphinx-quickstart``. +#. Change the ``html_theme`` parameter to ``solar``. +#. Change the ``html_theme_path`` to the location containing the extracted archive. + +License +------- +`GNU General Public License `_. + +Credits +------- +Modified from the default Sphinx theme -- Sphinxdoc + +Background pattern from http://subtlepatterns.com. diff --git a/docs/source/solar/layout.html b/docs/source/solar/layout.html new file mode 100644 index 0000000..6c57110 --- /dev/null +++ b/docs/source/solar/layout.html @@ -0,0 +1,32 @@ +{% extends "basic/layout.html" %} + +{%- block doctype -%} + +{%- endblock -%} + +{%- block extrahead -%} + + +{%- endblock -%} + +{# put the sidebar before the body #} +{% block sidebar1 %}{{ sidebar() }}{% endblock %} +{% block sidebar2 %}{% endblock %} + +{%- block footer %} + +{%- endblock %} diff --git a/docs/source/solar/static/solar.css b/docs/source/solar/static/solar.css new file mode 100644 index 0000000..15b5ade --- /dev/null +++ b/docs/source/solar/static/solar.css @@ -0,0 +1,344 @@ +/* solar.css + * Modified from sphinxdoc.css of the sphinxdoc theme. +*/ + +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: 'Open Sans', sans-serif; + font-size: 14px; + line-height: 150%; + text-align: center; + color: #002b36; + padding: 0; + margin: 0px 80px 0px 80px; + min-width: 740px; + -moz-box-shadow: 0px 0px 10px #93a1a1; + -webkit-box-shadow: 0px 0px 10px #93a1a1; + box-shadow: 0px 0px 10px #93a1a1; + background: url("subtle_dots.png") repeat; + +} + +div.document { + background-color: #fcfcfc; + text-align: left; + background-repeat: repeat-x; +} + +div.bodywrapper { + margin: 0 240px 0 0; + border-right: 1px dotted #eee8d5; +} + +div.body { + background-color: white; + margin: 0; + padding: 0.5em 20px 20px 20px; +} + +div.related { + font-size: 1em; + background: #002b36; + color: #839496; + padding: 5px 0px; +} + +div.related ul { + height: 2em; + margin: 2px; +} + +div.related ul li { + margin: 0; + padding: 0; + height: 2em; + float: left; +} + +div.related ul li.right { + float: right; + margin-right: 5px; +} + +div.related ul li a { + margin: 0; + padding: 2px 5px; + line-height: 2em; + text-decoration: none; + color: #839496; +} + +div.related ul li a:hover { + background-color: #073642; + -webkit-border-radius: 2px; + -moz-border-radius: 2px; + border-radius: 2px; +} + +div.sphinxsidebarwrapper { + padding: 0; +} + +div.sphinxsidebar { + margin: 0; + padding: 0.5em 15px 15px 0; + width: 210px; + float: right; + font-size: 0.9em; + text-align: left; +} + +div.sphinxsidebar h3, div.sphinxsidebar h4 { + margin: 1em 0 0.5em 0; + font-size: 1em; + padding: 0.7em; + background-color: #eeeff1; +} + +div.sphinxsidebar h3 a { + color: #2E3436; +} + +div.sphinxsidebar ul { + padding-left: 1.5em; + margin-top: 7px; + padding: 0; + line-height: 150%; + color: #586e75; +} + +div.sphinxsidebar ul ul { + margin-left: 20px; +} + +div.sphinxsidebar input { + border: 1px solid #eee8d5; +} + +div.footer { + background-color: #93a1a1; + color: #eee; + padding: 3px 8px 3px 0; + clear: both; + font-size: 0.8em; + text-align: right; +} + +div.footer a { + color: #eee; + text-decoration: none; +} + +/* -- body styles ----------------------------------------------------------- */ + +p { + margin: 0.8em 0 0.5em 0; +} + +div.body a, div.sphinxsidebarwrapper a { + color: #268bd2; + text-decoration: none; +} + +div.body a:hover, div.sphinxsidebarwrapper a:hover { + border-bottom: 1px solid #268bd2; +} + +h1, h2, h3, h4, h5, h6 { + font-family: "Open Sans", sans-serif; + font-weight: 300; +} + +h1 { + margin: 0; + padding: 0.7em 0 0.3em 0; + line-height: 1.2em; + color: #002b36; + text-shadow: #eee 0.1em 0.1em 0.1em; +} + +h2 { + margin: 1.3em 0 0.2em 0; + padding: 0 0 10px 0; + color: #073642; + border-bottom: 1px solid #eee; +} + +h3 { + margin: 1em 0 -0.3em 0; + padding-bottom: 5px; +} + +h3, h4, h5, h6 { + color: #073642; + border-bottom: 1px dotted #eee; +} + +div.body h1 a, div.body h2 a, div.body h3 a, div.body h4 a, div.body h5 a, div.body h6 a { + color: #657B83!important; +} + +h1 a.anchor, h2 a.anchor, h3 a.anchor, h4 a.anchor, h5 a.anchor, h6 a.anchor { + display: none; + margin: 0 0 0 0.3em; + padding: 0 0.2em 0 0.2em; + color: #aaa!important; +} + +h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, +h5:hover a.anchor, h6:hover a.anchor { + display: inline; +} + +h1 a.anchor:hover, h2 a.anchor:hover, h3 a.anchor:hover, h4 a.anchor:hover, +h5 a.anchor:hover, h6 a.anchor:hover { + color: #777; + background-color: #eee; +} + +a.headerlink { + color: #c60f0f!important; + font-size: 1em; + margin-left: 6px; + padding: 0 4px 0 4px; + text-decoration: none!important; +} + +a.headerlink:hover { + background-color: #ccc; + color: white!important; +} + + +cite, code, tt { + font-family: 'Source Code Pro', monospace; + font-size: 0.9em; + letter-spacing: 0.01em; + background-color: #eeeff2; + font-style: normal; +} + +hr { + border: 1px solid #eee; + margin: 2em; +} + +.highlight { + -webkit-border-radius: 2px; + -moz-border-radius: 2px; + border-radius: 2px; +} + +pre { + font-family: 'Source Code Pro', monospace; + font-style: normal; + font-size: 0.9em; + letter-spacing: 0.015em; + line-height: 120%; + padding: 0.7em; + white-space: pre-wrap; /* css-3 */ + white-space: -moz-pre-wrap; /* Mozilla, since 1999 */ + white-space: -pre-wrap; /* Opera 4-6 */ + white-space: -o-pre-wrap; /* Opera 7 */ + word-wrap: break-word; /* Internet Explorer 5.5+ */ +} + +pre a { + color: inherit; + text-decoration: underline; +} + +td.linenos pre { + padding: 0.5em 0; +} + +div.quotebar { + background-color: #f8f8f8; + max-width: 250px; + float: right; + padding: 2px 7px; + border: 1px solid #ccc; +} + +div.topic { + background-color: #f8f8f8; +} + +table { + border-collapse: collapse; + margin: 0 -0.5em 0 -0.5em; +} + +table td, table th { + padding: 0.2em 0.5em 0.2em 0.5em; +} + +div.admonition { + font-size: 0.9em; + margin: 1em 0 1em 0; + border: 1px solid #eee; + background-color: #f7f7f7; + padding: 0; + -moz-box-shadow: 0px 8px 6px -8px #93a1a1; + -webkit-box-shadow: 0px 8px 6px -8px #93a1a1; + box-shadow: 0px 8px 6px -8px #93a1a1; +} + +div.admonition p { + margin: 0.5em 1em 0.5em 1em; + padding: 0.2em; +} + +div.admonition pre { + margin: 0.4em 1em 0.4em 1em; +} + +div.admonition p.admonition-title +{ + margin: 0; + padding: 0.2em 0 0.2em 0.6em; + color: white; + border-bottom: 1px solid #eee8d5; + font-weight: bold; + background-color: #268bd2; +} + +div.warning p.admonition-title, +div.important p.admonition-title { + background-color: #cb4b16; +} + +div.hint p.admonition-title, +div.tip p.admonition-title { + background-color: #859900; +} + +div.caution p.admonition-title, +div.attention p.admonition-title, +div.danger p.admonition-title, +div.error p.admonition-title { + background-color: #dc322f; +} + +div.admonition ul, div.admonition ol { + margin: 0.1em 0.5em 0.5em 3em; + padding: 0; +} + +div.versioninfo { + margin: 1em 0 0 0; + border: 1px solid #eee; + background-color: #DDEAF0; + padding: 8px; + line-height: 1.3em; + font-size: 0.9em; +} + +div.viewcode-block:target { + background-color: #f4debf; + border-top: 1px solid #eee; + border-bottom: 1px solid #eee; +} diff --git a/docs/source/solar/static/solarized-dark.css b/docs/source/solar/static/solarized-dark.css new file mode 100644 index 0000000..6ebb945 --- /dev/null +++ b/docs/source/solar/static/solarized-dark.css @@ -0,0 +1,84 @@ +/* solarized dark style for solar theme */ + +/*style pre scrollbar*/ +pre::-webkit-scrollbar, .highlight::-webkit-scrollbar { + height: 0.5em; + background: #073642; +} + +pre::-webkit-scrollbar-thumb { + border-radius: 1em; + background: #93a1a1; +} + +/* pygments style */ +.highlight .hll { background-color: #ffffcc } +.highlight { background: #002B36!important; color: #93A1A1 } +.highlight .c { color: #586E75 } /* Comment */ +.highlight .err { color: #93A1A1 } /* Error */ +.highlight .g { color: #93A1A1 } /* Generic */ +.highlight .k { color: #859900 } /* Keyword */ +.highlight .l { color: #93A1A1 } /* Literal */ +.highlight .n { color: #93A1A1 } /* Name */ +.highlight .o { color: #859900 } /* Operator */ +.highlight .x { color: #CB4B16 } /* Other */ +.highlight .p { color: #93A1A1 } /* Punctuation */ +.highlight .cm { color: #586E75 } /* Comment.Multiline */ +.highlight .cp { color: #859900 } /* Comment.Preproc */ +.highlight .c1 { color: #586E75 } /* Comment.Single */ +.highlight .cs { color: #859900 } /* Comment.Special */ +.highlight .gd { color: #2AA198 } /* Generic.Deleted */ +.highlight .ge { color: #93A1A1; font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #DC322F } /* Generic.Error */ +.highlight .gh { color: #CB4B16 } /* Generic.Heading */ +.highlight .gi { color: #859900 } /* Generic.Inserted */ +.highlight .go { color: #93A1A1 } /* Generic.Output */ +.highlight .gp { color: #93A1A1 } /* Generic.Prompt */ +.highlight .gs { color: #93A1A1; font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #CB4B16 } /* Generic.Subheading */ +.highlight .gt { color: #93A1A1 } /* Generic.Traceback */ +.highlight .kc { color: #CB4B16 } /* Keyword.Constant */ +.highlight .kd { color: #268BD2 } /* Keyword.Declaration */ +.highlight .kn { color: #859900 } /* Keyword.Namespace */ +.highlight .kp { color: #859900 } /* Keyword.Pseudo */ +.highlight .kr { color: #268BD2 } /* Keyword.Reserved */ +.highlight .kt { color: #DC322F } /* Keyword.Type */ +.highlight .ld { color: #93A1A1 } /* Literal.Date */ +.highlight .m { color: #2AA198 } /* Literal.Number */ +.highlight .s { color: #2AA198 } /* Literal.String */ +.highlight .na { color: #93A1A1 } /* Name.Attribute */ +.highlight .nb { color: #B58900 } /* Name.Builtin */ +.highlight .nc { color: #268BD2 } /* Name.Class */ +.highlight .no { color: #CB4B16 } /* Name.Constant */ +.highlight .nd { color: #268BD2 } /* Name.Decorator */ +.highlight .ni { color: #CB4B16 } /* Name.Entity */ +.highlight .ne { color: #CB4B16 } /* Name.Exception */ +.highlight .nf { color: #268BD2 } /* Name.Function */ +.highlight .nl { color: #93A1A1 } /* Name.Label */ +.highlight .nn { color: #93A1A1 } /* Name.Namespace */ +.highlight .nx { color: #93A1A1 } /* Name.Other */ +.highlight .py { color: #93A1A1 } /* Name.Property */ +.highlight .nt { color: #268BD2 } /* Name.Tag */ +.highlight .nv { color: #268BD2 } /* Name.Variable */ +.highlight .ow { color: #859900 } /* Operator.Word */ +.highlight .w { color: #93A1A1 } /* Text.Whitespace */ +.highlight .mf { color: #2AA198 } /* Literal.Number.Float */ +.highlight .mh { color: #2AA198 } /* Literal.Number.Hex */ +.highlight .mi { color: #2AA198 } /* Literal.Number.Integer */ +.highlight .mo { color: #2AA198 } /* Literal.Number.Oct */ +.highlight .sb { color: #586E75 } /* Literal.String.Backtick */ +.highlight .sc { color: #2AA198 } /* Literal.String.Char */ +.highlight .sd { color: #93A1A1 } /* Literal.String.Doc */ +.highlight .s2 { color: #2AA198 } /* Literal.String.Double */ +.highlight .se { color: #CB4B16 } /* Literal.String.Escape */ +.highlight .sh { color: #93A1A1 } /* Literal.String.Heredoc */ +.highlight .si { color: #2AA198 } /* Literal.String.Interpol */ +.highlight .sx { color: #2AA198 } /* Literal.String.Other */ +.highlight .sr { color: #DC322F } /* Literal.String.Regex */ +.highlight .s1 { color: #2AA198 } /* Literal.String.Single */ +.highlight .ss { color: #2AA198 } /* Literal.String.Symbol */ +.highlight .bp { color: #268BD2 } /* Name.Builtin.Pseudo */ +.highlight .vc { color: #268BD2 } /* Name.Variable.Class */ +.highlight .vg { color: #268BD2 } /* Name.Variable.Global */ +.highlight .vi { color: #268BD2 } /* Name.Variable.Instance */ +.highlight .il { color: #2AA198 } /* Literal.Number.Integer.Long */ diff --git a/docs/source/solar/static/subtle_dots.png b/docs/source/solar/static/subtle_dots.png new file mode 100644 index 0000000..bb2d611 Binary files /dev/null and b/docs/source/solar/static/subtle_dots.png differ diff --git a/docs/source/solar/theme.conf b/docs/source/solar/theme.conf new file mode 100644 index 0000000..d8fc2f3 --- /dev/null +++ b/docs/source/solar/theme.conf @@ -0,0 +1,4 @@ +[theme] +inherit = basic +stylesheet = solar.css +pygments_style = none diff --git a/docs/source/versorted.rst b/docs/source/versorted.rst new file mode 100644 index 0000000..6f88597 --- /dev/null +++ b/docs/source/versorted.rst @@ -0,0 +1,8 @@ +.. default-domain:: py +.. currentmodule:: natsort + +:func:`~natsort.versorted` +========================== + +.. autofunction:: versorted + diff --git a/natsort/natsort.py b/natsort/natsort.py index bb26ebe..47ca54b 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -245,7 +245,7 @@ def natsort_key(val, key=None, number_type=float, signed=True, exp=True, It is designed for use in passing to the 'sorted' builtin or 'sort' attribute of lists. - .. note:: Depreciation Notice (3.4.0) + .. note:: Depreciated since version 3.4.0. This function remains in the publicly exposed API for backwards-compatibility reasons, but future development should use the newer `natsort_keygen` function. It is @@ -267,7 +267,7 @@ def natsort_key(val, key=None, number_type=float, signed=True, exp=True, number_type : {{None, float, int}}, optional The types of number to sort on: `float` searches for floating - point numbers, `int` searches for integers, and `None `searches + point numbers, `int` searches for integers, and `None` searches for digits (like integers but does not take into account negative sign). `None` is a shortcut for `number_type = int` and `signed = False`. @@ -288,7 +288,7 @@ def natsort_key(val, key=None, number_type=float, signed=True, exp=True, as_path : {{True, False}}, optional This option will force strings to be interpreted as filesystem paths, so they will be split according to the filesystem separator - (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the + (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the file extension, if any. Without this, lists of file paths like ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted properly; ``'Folder'`` will be placed at the end, not at the front. @@ -383,7 +383,7 @@ def natsort_keygen(key=None, number_type=float, signed=True, exp=True, number_type : {{None, float, int}}, optional The types of number to sort on: `float` searches for floating - point numbers, `int` searches for integers, and `None `searches + point numbers, `int` searches for integers, and `None` searches for digits (like integers but does not take into account negative sign). `None` is a shortcut for `number_type = int` and `signed = False`. @@ -404,7 +404,7 @@ def natsort_keygen(key=None, number_type=float, signed=True, exp=True, as_path : {{True, False}}, optional This option will force strings to be interpreted as filesystem paths, so they will be split according to the filesystem separator - (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the + (i.e. `/` on UNIX, `\\\\` on Windows), as well as splitting on the file extension, if any. Without this, lists with file paths like ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted properly; ``'Folder'`` will be placed at the end, not at the front. @@ -480,7 +480,7 @@ def natsorted(seq, key=None, number_type=float, signed=True, exp=True, number_type : {{None, float, int}}, optional The types of number to sort on: `float` searches for floating - point numbers, `int` searches for integers, and `None `searches + point numbers, `int` searches for integers, and `None` searches for digits (like integers but does not take into account negative sign). `None` is a shortcut for `number_type = int` and `signed = False`. @@ -505,7 +505,7 @@ def natsorted(seq, key=None, number_type=float, signed=True, exp=True, as_path : {{True, False}}, optional This option will force strings to be interpreted as filesystem paths, so they will be split according to the filesystem separator - (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the + (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the file extension, if any. Without this, lists of file paths like ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted properly; ``'Folder'`` will be placed at the end, not at the front. @@ -518,6 +518,7 @@ def natsorted(seq, key=None, number_type=float, signed=True, exp=True, See Also -------- + natsort_keygen : Generates the key that makes natural sorting possible. versorted : A wrapper for ``natsorted(seq, number_type=None)``. index_natsorted : Returns the sorted indexes from `natsorted`. @@ -572,7 +573,7 @@ def versorted(seq, key=None, reverse=False, as_path=False): as_path : {{True, False}}, optional This option will force strings to be interpreted as filesystem paths, so they will be split according to the filesystem separator - (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the + (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the file extension, if any. Without this, lists of file paths like ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted properly; ``'Folder'`` will be placed at the end, not at the front. @@ -622,7 +623,7 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, number_type : {{None, float, int}}, optional The types of number to sort on: `float` searches for floating - point numbers, `int` searches for integers, and `None `searches + point numbers, `int` searches for integers, and `None` searches for digits (like integers but does not take into account negative sign). `None` is a shortcut for `number_type = int` and `signed = False`. @@ -647,7 +648,7 @@ def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, as_path : {{True, False}}, optional This option will force strings to be interpreted as filesystem paths, so they will be split according to the filesystem separator - (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the + (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the file extension, if any. Without this, lists of file paths like ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted properly; ``'Folder'`` will be placed at the end, not at the front. @@ -735,7 +736,7 @@ def index_versorted(seq, key=None, reverse=False, as_path=False): as_path : {{True, False}}, optional This option will force strings to be interpreted as filesystem paths, so they will be split according to the filesystem separator - (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the + (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the file extension, if any. Without this, lists of file paths like ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted properly; ``'Folder'`` will be placed at the end, not at the front. diff --git a/setup.cfg b/setup.cfg index 98a5fe7..a5b0345 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,6 +8,8 @@ formats = zip,gztar flakes-ignore = natsort/py23compat.py UndefinedName natsort/__init__.py UnusedImport + docs/source/conf.py ALL pep8ignore = test_natsort/test_natsort.py E501 E241 E221 + docs/source/conf.py ALL diff --git a/setup.py b/setup.py index e534448..7472974 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,9 @@ class PyTest(TestCommand): import pytest err1 = pytest.main(['--cov', 'natsort', '--flakes', '--pep8']) err2 = pytest.main(['--doctest-modules', 'natsort']) - err3 = pytest.main(['README.rst']) + err3 = pytest.main(['README.rst', + 'docs/source/intro.rst', + 'docs/source/examples.rst']) return err1 | err2 | err3 -- cgit v1.2.1 From d7c0094ad2ca0013652591b6b92b7a3328da5c2c Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Sat, 19 Jul 2014 01:45:49 -0700 Subject: Fixed PEP8 errors on comments. Also moved Sphinx and numpydoc install to before_deploy because it only needs to be performed for when the documentation is being deployed. --- .travis.yml | 3 +-- natsort/natsort.py | 4 ++-- setup.py | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6b96f68..6790232 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,8 +8,6 @@ python: install: - pip install pytest-cov pytest-flakes pytest-pep8 - pip install coveralls -- pip install wheel -- pip install Sphinx numpydoc - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi script: - python -m pytest --cov natsort --flakes --pep8 @@ -19,6 +17,7 @@ script: after_success: coveralls before_deploy: +- pip install Sphinx numpydoc # Only needed to install for deploying - python setup.py build_sphinx deploy: provider: pypi diff --git a/natsort/natsort.py b/natsort/natsort.py index 47ca54b..c67ec3f 100644 --- a/natsort/natsort.py +++ b/natsort/natsort.py @@ -25,8 +25,8 @@ from warnings import warn from .py23compat import u_format, py23_str, py23_zip -__doc__ = u_format(__doc__) # Make sure the doctest works for either - # python2 or python3 +# Make sure the doctest works for either python2 or python3 +__doc__ = u_format(__doc__) # The regex that locates floats float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)') diff --git a/setup.py b/setup.py index 7472974..10e52b5 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ class PyTest(TestCommand): self.test_suite = True def run_tests(self): - #import here, cause outside the eggs aren't loaded + # import here, cause outside the eggs aren't loaded import pytest err1 = pytest.main(['--cov', 'natsort', '--flakes', '--pep8']) err2 = pytest.main(['--doctest-modules', 'natsort']) -- cgit v1.2.1 From 7f6c833885c8ae2ef8d28ddb018502961ece3f87 Mon Sep 17 00:00:00 2001 From: Seth M Morton Date: Sat, 19 Jul 2014 02:22:38 -0700 Subject: Bumped version, finalized .travis.yml. Reduced the number of iterations in the stress test, and changed the deployment so that it only occurs for python 3.3. --- .travis.yml | 4 +++- README.rst | 2 +- docs/source/changelog.rst | 2 +- natsort/_version.py | 2 +- test_natsort/stress_natsort.py | 12 ++++++------ 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6790232..72df9c9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,7 +17,7 @@ script: after_success: coveralls before_deploy: -- pip install Sphinx numpydoc # Only needed to install for deploying +- pip install Sphinx numpydoc - python setup.py build_sphinx deploy: provider: pypi @@ -26,6 +26,8 @@ deploy: secure: OaYQtVh4mGT0ozN7Ar2lSm2IEVMKIyvOESGPGLwVyVxPqp6oC101MovJ7041bZdjMzirMs54EJwtEGQpKFmDBGcKgbjPiYId5Nqb/yDhLC/ojgarbLoFJvUKV6dWJePyY7EOycrqcMdiDabdG80Bw4zziQExbmIOdUiscsAVVmA= on: tags: true + all_branches: true repo: SethMMorton/natsort + python: 3.3 distributions: "sdist bdist_wheel" docs_dir: build/sphinx/html diff --git a/README.rst b/README.rst index 973e442..ee5fc1f 100644 --- a/README.rst +++ b/README.rst @@ -105,7 +105,7 @@ History These are the last three entries of the changelog. See the package documentation for the complete changelog. -XX-XX-2014 v. 3.4.0 +07-19-2014 v. 3.4.0 ''''''''''''''''''' - Fixed a bug that caused user's options to the 'natsort_key' to not be diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 936a92b..807bfe5 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -3,7 +3,7 @@ Changelog --------- -XX-XX-2014 v. 3.4.0 +07-19-2014 v. 3.4.0 ''''''''''''''''''' - Fixed a bug that caused user's options to the 'natsort_key' to not be diff --git a/natsort/_version.py b/natsort/_version.py index 1c3ba50..d220a20 100644 --- a/natsort/_version.py +++ b/natsort/_version.py @@ -2,4 +2,4 @@ from __future__ import (print_function, division, unicode_literals, absolute_import) -__version__ = '3.3.0' +__version__ = '3.4.0' diff --git a/test_natsort/stress_natsort.py b/test_natsort/stress_natsort.py index c890e5f..7237db3 100644 --- a/test_natsort/stress_natsort.py +++ b/test_natsort/stress_natsort.py @@ -11,10 +11,10 @@ from natsort.py23compat import py23_range def test_random(): - """Try to sort 1,000,000 randomly generated strings without exception.""" + """Try to sort 100,000 randomly generated strings without exception.""" - # Repeat test 1,000,000 times - for _ in py23_range(1000000): + # Repeat test 100,000 times + for _ in py23_range(100000): # Made a list of five randomly generated strings lst = [''.join(sample(printable, randint(7, 30))) for __ in py23_range(5)] @@ -28,12 +28,12 @@ def test_random(): def test_similar(): - """Try to sort 1,000,000 randomly generated + """Try to sort 100,000 randomly generated similar strings without exception. """ - # Repeat test 1,000,000 times - for _ in py23_range(1000000): + # Repeat test 100,000 times + for _ in py23_range(100000): # Create a randomly generated string base = sample(printable, randint(7, 30)) # Make a list of strings based on this string, -- cgit v1.2.1