Refactor split_by_op and split_by_datatype

split_by_op was returning data against which further error checking was then required. This change moves that error handling inside the method and simplifies the regular expression so that it splits (greedily) on the first operator it finds. If the split is not possible, it is a ValueError. If the field or value are empty, that is a ValueError. Both split_by_op and split_by_datatype were doing a findall() where a match() and split() do the right job and more efficiently. Regular expression compilation has been moved to the module level to insure they need only be compiled once. Operator keys must be sorted by length to ensure the point at which the split happens is most greedy. Using a split keeps the regex short and removes any statements about the left and right hand sides of the operator. Tests added to cover the method more completely, including testing for corner cases such as single character field or values or operators showing up in unexpected locations. 'string' variable renamed to 'query' and 'query_value' to avoid confusion. Named parameters on string substitution for clarity. Note that the tests which do self.assertRaises could more explicitly check the exception with self.assertRaisesRegexp but that would break compatibility with Python 2.6. Change-Id: Icd815ff65aba9eae3f76afee3bb33e85d85bea72 Closes-Bug: #1314544
author: Chris Dent <chdent@redhat.com> 2014-05-30 11:40:11 +0100
committer: Chris Dent <chdent@redhat.com> 2014-06-02 18:21:51 +0100
commit: 2a2e2dd2732240a08d7f0e1288229a39ae380d1c (patch)
tree: 121ac1959221a96f63f766c27085895c60a8828a
parent: d55038ceafdd49ac60ba1e115c331583b430f1ee (diff)
download: python-ceilometerclient-2a2e2dd2732240a08d7f0e1288229a39ae380d1c.tar.gz
2 files changed, 130 insertions, 35 deletions
diff --git a/ceilometerclient/tests/v2/test_options.py b/ceilometerclient/tests/v2/test_options.py
index 8cbf73a..e9dee54 100644
--- a/ceilometerclient/tests/v2/test_options.py
+++ b/ceilometerclient/tests/v2/test_options.py
@@ -83,9 +83,57 @@ class CliTest(utils.BaseTestCase):
                                'op': 'le', 'value': '283.347',
                                'type': ''}])
 
-    def test_invalid_seperator(self):
-        self.assertRaises(ValueError, options.cli_to_array,
-                          'this=2.4,fooo=doof')
+    def test_comma(self):
+        ar = options.cli_to_array('this=2.4,fooo=doof')
+        self.assertEqual([{'field': 'this',
+                           'op': 'eq',
+                           'value': '2.4,fooo=doof',
+                           'type': ''}],
+                         ar)
+
+    def test_special_character(self):
+        ar = options.cli_to_array('key~123=value!123')
+        self.assertEqual([{'field': 'key~123',
+                           'op': 'eq',
+                           'value': 'value!123',
+                           'type': ''}],
+                         ar)
+
+    def _do_test_typed_float_op(self, op, op_str):
+        ar = options.cli_to_array('that%sfloat::283.347' % op)
+        self.assertEqual([{'field': 'that',
+                           'type': 'float',
+                           'value': '283.347',
+                           'op': op_str}],
+                         ar)
+
+    def test_typed_float_eq(self):
+        self._do_test_typed_float_op('<', 'lt')
+
+    def test_typed_float_le(self):
+        self._do_test_typed_float_op('<=', 'le')
+
+    def test_typed_string_whitespace(self):
+        ar = options.cli_to_array('state=string::insufficient data')
+        self.assertEqual([{'field': 'state',
+                           'op': 'eq',
+                           'type': 'string',
+                           'value': 'insufficient data'}],
+                         ar)
+
+    def test_typed_string_whitespace_complex(self):
+        ar = options.cli_to_array(
+            'that>=float::99.9999;state=string::insufficient data'
+        )
+        self.assertEqual([{'field': 'that',
+                           'op': 'ge',
+                           'type': 'float',
+                           'value': '99.9999'},
+                          {'field': 'state',
+                           'op': 'eq',
+                           'type': 'string',
+                           'value': 'insufficient data'}],
+                         ar)
 
     def test_invalid_operator(self):
         self.assertRaises(ValueError, options.cli_to_array,
@@ -97,6 +145,22 @@ class CliTest(utils.BaseTestCase):
                                'op': 'le', 'value': '34',
                                'type': ''}])
 
+    def test_single_char_field_or_value(self):
+        ar = options.cli_to_array('m<=34;large.thing>s;x!=y')
+        self.assertEqual([{'field': 'm',
+                           'op': 'le',
+                           'value': '34',
+                           'type': ''},
+                          {'field': 'large.thing',
+                           'op': 'gt',
+                           'value': 's',
+                           'type': ''},
+                          {'field': 'x',
+                           'op': 'ne',
+                           'value': 'y',
+                           'type': ''}],
+                         ar)
+
     def test_without_data_type(self):
         ar = options.cli_to_array('hostname=localhost')
         self.assertEqual(ar, [{'field': 'hostname',
@@ -152,3 +216,25 @@ class CliTest(utils.BaseTestCase):
                                'op': 'eq',
                                'type': '',
                                'value': 'datetime:sometimestamp'}])
+
+    def test_missing_key(self):
+        self.assertRaises(ValueError, options.cli_to_array,
+                          'average=float::1234.0;>=string::hello')
+
+    def test_missing_value(self):
+        self.assertRaises(ValueError, options.cli_to_array,
+                          'average=float::1234.0;house>=')
+
+    def test_timestamp_value(self):
+        ar = options.cli_to_array(
+            'project=cow;timestamp>=datetime::2014-03-11T16:02:58'
+        )
+        self.assertEqual([{'field': 'project',
+                           'op': 'eq',
+                           'type': '',
+                           'value': 'cow'},
+                          {'field': 'timestamp',
+                           'op': 'ge',
+                           'type': 'datetime',
+                           'value': '2014-03-11T16:02:58'}],
+                         ar)
diff --git a/ceilometerclient/v2/options.py b/ceilometerclient/v2/options.py
index 73bd200..775ac40 100644
--- a/ceilometerclient/v2/options.py
+++ b/ceilometerclient/v2/options.py
@@ -15,6 +15,18 @@ import re
 
 from six.moves.urllib import parse
 
+OP_LOOKUP = {'!=': 'ne',
+             '>=': 'ge',
+             '<=': 'le',
+             '>': 'gt',
+             '<': 'lt',
+             '=': 'eq'}
+
+OP_LOOKUP_KEYS = '|'.join(sorted(OP_LOOKUP.keys(), key=len, reverse=True))
+OP_SPLIT_RE = re.compile(r'(%s)' % OP_LOOKUP_KEYS)
+
+DATA_TYPE_RE = re.compile(r'^(string|integer|float|datetime|boolean)(::)(.+)$')
+
 
 def build_url(path, q, params=None):
     '''This converts from a list of dicts and a list of params to
@@ -68,44 +80,41 @@ def cli_to_array(cli_query):
     if cli_query is None:
         return None
 
-    op_lookup = {'!=': 'ne',
-                 '>=': 'ge',
-                 '<=': 'le',
-                 '>': 'gt',
-                 '<': 'lt',
-                 '=': 'eq'}
-
-    def split_by_op(string):
-        # two character split (<=,!=)
-        frags = re.findall(r'([[a-zA-Z0-9_.]+)([><!]=)([^ -,\t\n\r\f\v]+)',
-                           string)
-        if len(frags) == 0:
-            # single char split (<,=)
-            frags = re.findall(r'([a-zA-Z0-9_.]+)([><=])([^ -,\t\n\r\f\v]+)',
-                               string)
-        return frags
-
-    def split_by_data_type(string):
-        frags = re.findall(r'^(string|integer|float|datetime|boolean)(::)'
-                           r'([^ -,\t\n\r\f\v]+)$', string)
-
-        # frags[1] is the separator. Return a list without it if the type
-        # identifier was found.
-        return [frags[0][0], frags[0][2]] if frags else None
+    def split_by_op(query):
+        """Split a single query string to field, operator, value.
+        """
+
+        def _value_error(message):
+            raise ValueError('invalid query %(query)s: missing %(message)s' %
+                             {'query': query, 'message': message})
+
+        try:
+            field, operator, value = OP_SPLIT_RE.split(query, maxsplit=1)
+        except ValueError:
+            _value_error('operator')
+
+        if not len(field):
+            _value_error('field')
+
+        if not len(value):
+            _value_error('value')
+
+        return (field, operator, value)
+
+    def split_by_data_type(query_value):
+        frags = DATA_TYPE_RE.match(query_value)
+
+        # The second match is the separator. Return a list without it if
+        # a type identifier was found.
+        return frags.group(1, 3) if frags else None
 
     opts = []
     queries = cli_query.split(';')
     for q in queries:
-        frag = split_by_op(q)
-        if len(frag) > 1:
-            raise ValueError('incorrect separator %s in query "%s"' %
-                             ('(should be ";")', q))
-        if len(frag) == 0:
-            raise ValueError('invalid query %s' % q)
-        query = frag[0]
+        query = split_by_op(q)
         opt = {}
         opt['field'] = query[0]
-        opt['op'] = op_lookup[query[1]]
+        opt['op'] = OP_LOOKUP[query[1]]
 
         # Allow the data type of the value to be specified via <type>::<value>,
         # where type can be one of integer, string, float, datetime, boolean
author	Chris Dent <chdent@redhat.com>	2014-05-30 11:40:11 +0100
committer	Chris Dent <chdent@redhat.com>	2014-06-02 18:21:51 +0100
commit	2a2e2dd2732240a08d7f0e1288229a39ae380d1c (patch)
tree	121ac1959221a96f63f766c27085895c60a8828a
parent	d55038ceafdd49ac60ba1e115c331583b430f1ee (diff)
download	python-ceilometerclient-2a2e2dd2732240a08d7f0e1288229a39ae380d1c.tar.gz