Handle null values when sorting

One unfortunate change (or fortunate, depending on how you look at types) in Python 3 is the inability to sort iterables of different types. For example: >>> x = ['foo', 'bar', None, 'qux'] >>> sorted(x) Traceback (most recent call last): File "<stdin>", line 1, in <module> TypeError: '<' not supported between instances of 'NoneType' and 'str' Fortunately, we can take advantage of the fact that by providing a function for the 'key' that returns a tuple, we can sort on multiple conditions. In this case, "when the first key returns that two elements are equal, the second key is used to compare." [1] We can use this to first separate the values by whether they are None or not, punting those that are not to the end, before sorting the non-None values normally. For example: >>> x = ['foo', 'bar', None, 'qux'] >>> sorted(x, key=lambda k: (k is None, k)) ['bar', 'foo', 'qux', None] We were already using this feature implicitly through our use of 'operator.itemgetter(*indexes)', which will return a tuple if there is more than one item in 'indexes', and now we simply make that explicit, fixing the case where we're attempting to compare a comparable type with None. For all other cases, such as comparing a value that isn't comparable, we surround things with a try-catch and a debug logging statement to allow things to continue. Note that we could optimize what we're done further by building a key value that covers all indexes, rather than using a for loop to do so. For example: >>> x = [('baz', 2), (None, 0), ('bar', 3), ('baz', 4), ('qux', 0)] >>> sorted(x, key=lambda k: list( ... itertools.chain((k[i] is None, k[i]) for i in (0, 1))) ... ) [('bar', 3), ('baz', 2), ('baz', 4), ('qux', 0), (None, 0)] However, this would be harder to grok and would also mean we're unable to handle exceptions on a single column where e.g. there are mixed types or types that are not comparable while still sorting on the other columns. Perhaps this would be desirable for some users, but sorting on a best-effort basis does seem wiser and generally more user friendly. Anyone that wants to sort on such columns should ensure their types are comparable or implement their own sorting implementation. [1] https://www.kite.com/python/answers/how-to-sort-by-two-keys-in-python Change-Id: I4803051a6dd05c143a15923254af97e32cd39693 Signed-off-by: Stephen Finucane <sfinucan@redhat.com> Story: 2008456 Task: 41466
author: Stephen Finucane <sfinucan@redhat.com> 2021-01-27 17:31:22 +0000
committer: Stephen Finucane <sfinucan@redhat.com> 2021-01-29 15:40:31 +0000
commit: 4f45f9a30e657265c5b5ac119af3aaa0e5ec7184 (patch)
tree: 7215bf33e3a1cd3778596122e6b7f7674bbb2295
parent: 36134739396213fa11d1c4ca1e9cacb8f40215eb (diff)
download: cliff-4f45f9a30e657265c5b5ac119af3aaa0e5ec7184.tar.gz
3 files changed, 80 insertions, 7 deletions
diff --git a/cliff/lister.py b/cliff/lister.py
index cfba233..dde0f2d 100644
--- a/cliff/lister.py
+++ b/cliff/lister.py
@@ -13,7 +13,7 @@
 """Application base class for providing a list of data as output.
 """
 import abc
-import operator
+import logging
 
 from . import display
 
@@ -22,6 +22,8 @@ class Lister(display.DisplayCommandBase, metaclass=abc.ABCMeta):
     """Command base class for providing a list of data as output.
     """
 
+    log = logging.getLogger(__name__)
+
     @property
     def formatter_namespace(self):
         return 'cliff.formatter.list'
@@ -63,8 +65,25 @@ class Lister(display.DisplayCommandBase, metaclass=abc.ABCMeta):
         if parsed_args.sort_columns and self.need_sort_by_cliff:
             indexes = [column_names.index(c) for c in parsed_args.sort_columns
                        if c in column_names]
-            if indexes:
-                data = sorted(data, key=operator.itemgetter(*indexes))
+            for index in indexes[::-1]:
+                try:
+                    # We need to handle unset values (i.e. None) so we sort on
+                    # multiple conditions: the first comparing the results of
+                    # an 'is None' type check and the second comparing the
+                    # actual value. The second condition will only be checked
+                    # if the first returns True, which only happens if the
+                    # returns from the 'is None' check on the two values are
+                    # the same, i.e. both None or both not-None
+                    data = sorted(
+                        data, key=lambda k: (k[index] is None, k[index]),
+                    )
+                except TypeError:
+                    # Simply log and then ignore this; sorting is best effort
+                    self.log.warning(
+                        "Could not sort on field '%s'; unsortable types",
+                        parsed_args.sort_columns[index],
+                    )
+
         (columns_to_include, selector) = self._generate_columns_and_selector(
             parsed_args, column_names)
         if selector:
diff --git a/cliff/tests/test_lister.py b/cliff/tests/test_lister.py
index 8603004..7fc7222 100644
--- a/cliff/tests/test_lister.py
+++ b/cliff/tests/test_lister.py
@@ -32,16 +32,15 @@ class FauxFormatter(object):
 
 class ExerciseLister(lister.Lister):
 
+    data = [('a', 'A'), ('b', 'B'), ('c', 'A')]
+
     def _load_formatter_plugins(self):
         return {
             'test': FauxFormatter(),
         }
 
     def take_action(self, parsed_args):
-        return (
-            parsed_args.columns,
-            [('a', 'A'), ('b', 'B'), ('c', 'A')],
-        )
+        return (parsed_args.columns, self.data)
 
 
 class ExerciseListerCustomSort(ExerciseLister):
@@ -49,6 +48,16 @@ class ExerciseListerCustomSort(ExerciseLister):
     need_sort_by_cliff = False
 
 
+class ExerciseListerNullValues(ExerciseLister):
+
+    data = ExerciseLister.data + [(None, None)]
+
+
+class ExerciseListerDifferentTypes(ExerciseLister):
+
+    data = ExerciseLister.data + [(1, 0)]
+
+
 class TestLister(base.TestBase):
 
     def test_formatter_args(self):
@@ -111,6 +120,43 @@ class TestLister(base.TestBase):
         data = list(args[1])
         self.assertEqual([['a', 'A'], ['b', 'B'], ['c', 'A']], data)
 
+    def test_sort_by_column_with_null(self):
+        test_lister = ExerciseListerNullValues(mock.Mock(), [])
+        parsed_args = mock.Mock()
+        parsed_args.columns = ('Col1', 'Col2')
+        parsed_args.formatter = 'test'
+        parsed_args.sort_columns = ['Col2', 'Col1']
+
+        test_lister.run(parsed_args)
+
+        f = test_lister._formatter_plugins['test']
+        args = f.args[0]
+        data = list(args[1])
+        self.assertEqual(
+            [['a', 'A'], ['c', 'A'], ['b', 'B'], [None, None]], data)
+
+    def test_sort_by_column_with_different_types(self):
+        test_lister = ExerciseListerDifferentTypes(mock.Mock(), [])
+        parsed_args = mock.Mock()
+        parsed_args.columns = ('Col1', 'Col2')
+        parsed_args.formatter = 'test'
+        parsed_args.sort_columns = ['Col2', 'Col1']
+
+        with mock.patch.object(lister.Lister, 'log') as mock_log:
+            test_lister.run(parsed_args)
+
+        f = test_lister._formatter_plugins['test']
+        args = f.args[0]
+        data = list(args[1])
+        # The output should be unchanged
+        self.assertEqual(
+            [['a', 'A'], ['b', 'B'], ['c', 'A'], [1, 0]], data)
+        # but we should have logged a warning
+        mock_log.warning.assert_has_calls([
+            mock.call("Could not sort on field '%s'; unsortable types", col)
+            for col in parsed_args.sort_columns
+        ])
+
     def test_sort_by_non_displayed_column(self):
         test_lister = ExerciseLister(mock.Mock(), [])
         parsed_args = mock.Mock()
diff --git a/releasenotes/notes/handle-none-values-when-sorting-de40e36c66ad95ca.yaml b/releasenotes/notes/handle-none-values-when-sorting-de40e36c66ad95ca.yaml
new file mode 100644
index 0000000..a7368c1
--- /dev/null
+++ b/releasenotes/notes/handle-none-values-when-sorting-de40e36c66ad95ca.yaml
@@ -0,0 +1,8 @@
+---
+fixes:
+  - |
+    Sorting output using the ``--sort-column`` option will now handle ``None``
+    values. This was supported implicitly in Python 2 but was broken in the
+    move to Python 3. In addition, requests to sort a column containing
+    non-comparable types will now be ignored. Previously, these request would
+    result in a ``TypeError``.
author	Stephen Finucane <sfinucan@redhat.com>	2021-01-27 17:31:22 +0000
committer	Stephen Finucane <sfinucan@redhat.com>	2021-01-29 15:40:31 +0000
commit	4f45f9a30e657265c5b5ac119af3aaa0e5ec7184 (patch)
tree	7215bf33e3a1cd3778596122e6b7f7674bbb2295
parent	36134739396213fa11d1c4ca1e9cacb8f40215eb (diff)
download	cliff-4f45f9a30e657265c5b5ac119af3aaa0e5ec7184.tar.gz