diff options
author | Thomas Haller <thaller@redhat.com> | 2019-04-03 17:32:59 +0200 |
---|---|---|
committer | Thomas Haller <thaller@redhat.com> | 2019-04-10 15:05:57 +0200 |
commit | 5c1f93943e39d30cbf6ebb53ccdb74d5e04d50bc (patch) | |
tree | 057e5a0c6730c0a685d2a634bcc2f89be5b3e6af | |
parent | 453b3ea3626071efccdfa2872579acf37f1e5e33 (diff) | |
download | NetworkManager-5c1f93943e39d30cbf6ebb53ccdb74d5e04d50bc.tar.gz |
shared: add NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY flag for nm_utils_strsplit_set_full()
Previously, nm_utils_strsplit_set_full() would always remove empty
tokens. Add a flag NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY to avoid
that.
This makes nm_utils_strsplit_set_full() return the same result as
g_strsplit_set() and a direct replacement for it -- except for "",
where we return %NULL.
-rw-r--r-- | libnm-core/tests/test-general.c | 341 | ||||
-rw-r--r-- | shared/nm-utils/nm-shared-utils.c | 58 | ||||
-rw-r--r-- | shared/nm-utils/nm-shared-utils.h | 14 |
3 files changed, 365 insertions, 48 deletions
diff --git a/libnm-core/tests/test-general.c b/libnm-core/tests/test-general.c index 4063dfcf20..175b48b505 100644 --- a/libnm-core/tests/test-general.c +++ b/libnm-core/tests/test-general.c @@ -259,55 +259,279 @@ test_nm_g_slice_free_fcn (void) /*****************************************************************************/ static void -_do_test_nm_utils_strsplit_set (gboolean escape, const char *str, ...) +_do_test_nm_utils_strsplit_set_f_one (NMUtilsStrsplitSetFlags flags, + const char *str, + gsize words_len, + const char *const*exp_words) { - gs_unref_ptrarray GPtrArray *args_array = g_ptr_array_new (); - const char *const*args; - gs_free const char **words = NULL; - const char *arg; - gsize i; - va_list ap; + const char *DELIMITERS = " \t\n"; +#define DELIMITERS_C ' ', '\t', '\n' - va_start (ap, str); - while ((arg = va_arg (ap, const char *))) - g_ptr_array_add (args_array, (gpointer) arg); - va_end (ap); - g_ptr_array_add (args_array, NULL); + gs_free const char **words = NULL; + gsize i, j, k; + const gboolean f_allow_escaping = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING); + const gboolean f_preserve_empty = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY); + const char *s1; + gsize initial_offset; + gs_strfreev char **words_g = NULL; + + g_assert (!NM_FLAGS_ANY (flags, ~( NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING + | NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY))); + + /* assert that the epected words are valid (and don't contain unescaped delimiters). */ + for (i = 0; i < words_len; i++) { + const char *w = exp_words[i]; + + g_assert (w); + if (!f_preserve_empty) + g_assert (w[0]); + for (k = 0; w[k]; ) { + if ( f_allow_escaping + && w[k] == '\\') { + k++; + if (w[k] == '\0') + break; + k++; + continue; + } + g_assert (!NM_IN_SET (w[k], DELIMITERS_C)); + k++; + } + if (!f_allow_escaping) + g_assert (!NM_STRCHAR_ANY (w, ch, NM_IN_SET (ch, DELIMITERS_C))); + } - args = (const char *const*) args_array->pdata; + initial_offset = (f_preserve_empty || !str) + ? 0u + : strspn (str, DELIMITERS); - if (!escape && nmtst_get_rand_bool ()) - words = nm_utils_strsplit_set (str, " \t\n"); - else { - words = nm_utils_strsplit_set_full (str, - " \t\n", - escape - ? NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING - : NM_UTILS_STRSPLIT_SET_FLAGS_NONE); + /* first compare our expected values with what g_strsplit_set() would + * do. */ + words_g = str ? g_strsplit_set (str, DELIMITERS, -1) : NULL; + if (str == NULL) { + g_assert_cmpint (words_len, ==, 0); + g_assert (!words_g); + } else if (nm_streq0 (str, "")) { + g_assert_cmpint (words_len, ==, 0); + g_assert (words_g); + g_assert (!words_g[0]); + } else { + g_assert (words_g); + g_assert (words_g[0]); + if (!f_allow_escaping) { + if (!f_preserve_empty) { + for (i = 0, j = 0; words_g[i]; i++) { + if (words_g[i][0] == '\0') + g_free (words_g[i]); + else + words_g[j++] = words_g[i]; + } + words_g[j] = NULL; + } + if (f_preserve_empty) + g_assert_cmpint (words_len, >, 0); + for (i = 0; i < words_len; i++) { + g_assert (exp_words[i]); + g_assert_cmpstr (exp_words[i], ==, words_g[i]); + } + g_assert (words_g[words_len] == NULL); + g_assert_cmpint (NM_PTRARRAY_LEN (words_g), ==, words_len); + g_assert (_nm_utils_strv_cmp_n (exp_words, words_len, NM_CAST_STRV_CC (words_g), -1) == 0); + } } - if (!args[0]) { + if ( flags == NM_UTILS_STRSPLIT_SET_FLAGS_NONE + && nmtst_get_rand_bool ()) + words = nm_utils_strsplit_set (str, DELIMITERS); + else if ( flags == NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY + && nmtst_get_rand_bool ()) + words = nm_utils_strsplit_set_with_empty (str, DELIMITERS); + else + words = nm_utils_strsplit_set_full (str, DELIMITERS, flags); + + g_assert_cmpint (NM_PTRARRAY_LEN (words), ==, words_len); + + if (words_len == 0) { g_assert (!words); g_assert ( !str - || NM_STRCHAR_ALL (str, ch, NM_IN_SET (ch, ' ', '\t', '\n'))); + || NM_STRCHAR_ALL (str, ch, NM_IN_SET (ch, DELIMITERS_C))); return; } + g_assert (words); - for (i = 0; args[i] || words[i]; i++) { - g_assert (args[i]); - g_assert (words[i]); - g_assert (args[i][0]); - g_assert (escape || NM_STRCHAR_ALL (args[i], ch, !NM_IN_SET (ch, ' ', '\t', '\n'))); - g_assert_cmpstr (args[i], ==, words[i]); + for (i = 0; i < words_len; i++) + g_assert_cmpstr (exp_words[i], ==, words[i]); + g_assert (words[words_len] == NULL); + + g_assert (_nm_utils_strv_cmp_n (exp_words, words_len, words, -1) == 0); + + s1 = words[0]; + g_assert (s1 >= (char *) &words[words_len + 1]); + s1 = &s1[strlen (str)]; + for (i = 1; i < words_len; i++) { + g_assert (&(words[i - 1])[strlen (words[i - 1])] < words[i]); + g_assert (words[i] <= s1); + } + + /* while strsplit removes all delimiters, we can relatively easily find them + * in the original string. Assert that the original string and the pointer offsets + * of words correspond. In particular, find idx_delim_after and idx_delim_before + * to determine which delimiter was after/before a word. */ + { + gsize idx_word_start; + gsize idx_delim_after_old = G_MAXSIZE; + + idx_word_start = initial_offset; + for (i = 0; i < words_len; i++) { + const gsize l_i = strlen (words[i]); + gsize idx_delim_after; + gsize idx_delim_before; + + /* find the delimiter *after* words[i]. We can do that by looking at the next + * word and calculating the pointer difference. + * + * The delimiter after the very last word is '\0' and requires strlen() to find. */ + idx_delim_after = initial_offset + ((words[i] - words[0]) + l_i); + if (idx_delim_after != idx_word_start + l_i) { + g_assert (!f_preserve_empty); + g_assert_cmpint (idx_word_start + l_i, <, idx_delim_after); + idx_word_start = idx_delim_after - l_i; + } + if (i + 1 < words_len) { + gsize x = initial_offset + ((words[i + 1] - words[0]) - 1); + + if (idx_delim_after != x) { + g_assert (!f_preserve_empty); + g_assert_cmpint (idx_delim_after, <, x); + for (k = idx_delim_after; k <= x; k++) + g_assert (NM_IN_SET (str[k], DELIMITERS_C)); + } + g_assert (NM_IN_SET (str[idx_delim_after], DELIMITERS_C)); + } else { + if (f_preserve_empty) + g_assert (NM_IN_SET (str[idx_delim_after], '\0')); + else + g_assert (NM_IN_SET (str[idx_delim_after], '\0', DELIMITERS_C)); + } + + /* find the delimiter *before* words[i]. */ + if (i == 0) { + /* there is only a delimiter *before*, with !f_preserve_empty and leading + * delimiters. */ + idx_delim_before = G_MAXSIZE; + if (initial_offset > 0) { + g_assert (!f_preserve_empty); + idx_delim_before = initial_offset - 1; + } + } else + idx_delim_before = initial_offset + (words[i] - words[0]) - 1; + if (idx_delim_before != G_MAXSIZE) + g_assert (NM_IN_SET (str[idx_delim_before], DELIMITERS_C)); + if (idx_delim_after_old != idx_delim_before) { + g_assert (!f_preserve_empty); + if (i == 0) { + g_assert_cmpint (initial_offset, >, 0); + g_assert_cmpint (idx_delim_before, !=, G_MAXSIZE); + g_assert_cmpint (idx_delim_before, ==, initial_offset - 1); + } else { + g_assert_cmpint (idx_delim_after_old, !=, G_MAXSIZE); + g_assert_cmpint (idx_delim_before, !=, G_MAXSIZE); + g_assert_cmpint (idx_delim_after_old, <, idx_delim_before); + for (k = idx_delim_after_old; k <= idx_delim_before; k++) + g_assert (NM_IN_SET (str[k], DELIMITERS_C)); + } + } + + for (k = 0; k < l_i; ) { + if ( f_allow_escaping + && str[idx_word_start + k] == '\\') { + k++; + if (k >= l_i) + break; + k++; + continue; + } + g_assert (!NM_IN_SET (str[idx_word_start + k], DELIMITERS_C)); + k++; + } + g_assert (strncmp (words[i], &str[idx_word_start], l_i) == 0); + + if (i > 0) { + const char *s = &(words[i - 1])[strlen (words[i - 1]) + 1]; + + if (s != words[i]) { + g_assert (!f_preserve_empty); + g_assert (s < words[i]); + } + } + + idx_word_start += l_i + 1; + idx_delim_after_old = idx_delim_after; + } } } -#define do_test_nm_utils_strsplit_set(str, ...) \ - _do_test_nm_utils_strsplit_set (str, ##__VA_ARGS__, NULL) +static void +_do_test_nm_utils_strsplit_set_f (NMUtilsStrsplitSetFlags flags, + const char *str, + gsize words_len, + const char *const*exp_words) +{ + _do_test_nm_utils_strsplit_set_f_one (flags, str, words_len, exp_words); + + if (NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY)) { + gs_unref_ptrarray GPtrArray *exp_words2 = NULL; + gsize k; + + exp_words2 = g_ptr_array_new (); + for (k = 0; k < words_len; k++) { + if (exp_words[k][0] != '\0') + g_ptr_array_add (exp_words2, (gpointer) exp_words[k]); + } + + _do_test_nm_utils_strsplit_set_f_one (flags & (~NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY), + str, + exp_words2->len, + (const char *const*) exp_words2->pdata); + } +} + +#define do_test_nm_utils_strsplit_set_f(flags, str, ...) \ + _do_test_nm_utils_strsplit_set_f (flags, \ + str, \ + NM_NARG (__VA_ARGS__), \ + NM_MAKE_STRV (__VA_ARGS__)) + +#define do_test_nm_utils_strsplit_set(allow_escaping, str, ...) \ + do_test_nm_utils_strsplit_set_f ( (allow_escaping) \ + ? NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING \ + : NM_UTILS_STRSPLIT_SET_FLAGS_NONE, \ + str, \ + ##__VA_ARGS__) static void test_nm_utils_strsplit_set (void) { + gs_unref_ptrarray GPtrArray *words_exp = NULL; + guint test_run; + + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_NONE, NULL); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_NONE, ""); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_NONE, " "); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_NONE, "a b", "a", "b"); + + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, NULL); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, ""); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, " ", "", ""); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, " ", "", "", ""); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, "a ", "a", "", ""); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, "a b", "a", "", "b"); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, " ab b", "", "ab", "", "b"); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, "ab b", "ab", "", "b"); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, "abb", "abb"); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, "abb bb ", "abb", "", "bb", ""); + do_test_nm_utils_strsplit_set_f (NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, "abb bcb ", "abb", "bcb", ""); + do_test_nm_utils_strsplit_set (FALSE, NULL); do_test_nm_utils_strsplit_set (FALSE, ""); do_test_nm_utils_strsplit_set (FALSE, "\t"); @@ -331,6 +555,61 @@ test_nm_utils_strsplit_set (void) do_test_nm_utils_strsplit_set (TRUE, "foo\\", "foo\\"); do_test_nm_utils_strsplit_set (TRUE, "bar foo\\", "bar", "foo\\"); do_test_nm_utils_strsplit_set (TRUE, "\\ a b\\ \\ c", "\\ a", "b\\ \\ ", "c"); + + words_exp = g_ptr_array_new_with_free_func (g_free); + for (test_run = 0; test_run < 100; test_run++) { + gboolean f_allow_escaping = nmtst_get_rand_bool (); + guint words_len = nmtst_get_rand_int () % 100; + gs_free char *str = NULL; + guint i; + + g_ptr_array_set_size (words_exp, 0); + for (i = 0; i < words_len; i++) { + guint word_len; + char *word; + guint j; + + word_len = nmtst_get_rand_int (); + if ((word_len % 100) < 30) + word_len = 0; + else + word_len = (word_len >> 10) % 100; + word = g_new (char, word_len + 3); + for (j = 0; j < word_len; ) { + guint32 p = nmtst_get_rand_int (); + static const char delimiters_arr[] = { DELIMITERS_C }; + static const char regular_chars[] = "abcdefghijklmnopqrstuvwxyz"; + + if ( !f_allow_escaping + || (p % 1000) < 700) { + if (((p >> 20) % 100) < 20) + word[j++] = '\\'; + word[j++] = regular_chars[(p >> 11) % (G_N_ELEMENTS (regular_chars) - 1)]; + continue; + } + word[j++] = '\\'; + word[j++] = delimiters_arr[(p >> 11) % G_N_ELEMENTS (delimiters_arr)]; + } + word[j] = '\0'; + g_ptr_array_add (words_exp, word); + } + g_ptr_array_add (words_exp, NULL); + + str = g_strjoinv (" ", (char **) words_exp->pdata); + + if ( str[0] == '\0' + && words_len > 0) { + g_assert (words_len == 1); + g_assert_cmpstr (words_exp->pdata[0], ==, ""); + words_len = 0; + } + + _do_test_nm_utils_strsplit_set_f ( (f_allow_escaping ? NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING : NM_UTILS_STRSPLIT_SET_FLAGS_NONE) + | NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, + str, + words_len, + (const char *const*) words_exp->pdata); + } } /*****************************************************************************/ diff --git a/shared/nm-utils/nm-shared-utils.c b/shared/nm-utils/nm-shared-utils.c index 21658ca876..e8f3563c09 100644 --- a/shared/nm-utils/nm-shared-utils.c +++ b/shared/nm-utils/nm-shared-utils.c @@ -989,17 +989,26 @@ _char_lookup_has (const guint8 lookup[static 256], * each word once (the entire strv array), but instead copies it once * and all words point into that internal copy. * - * Another difference from g_strsplit_set() is that this never returns - * empty words. Multiple delimiters are combined and treated as one. + * Note that for @str %NULL and "", this always returns %NULL too. That differs + * from g_strsplit_set(), which would return an empty strv array for "". + * + * Note that g_strsplit_set() returns empty words as well. By default, + * nm_utils_strsplit_set_full() strips all empty tokens (that is, repeated + * delimiters. With %NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, empty tokens + * are not removed. * * If @flags has %NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING, delimiters prefixed * by a backslash are not treated as a separator. Such delimiters and their escape - * character are copied to the current word without unescaping them. + * character are copied to the current word without unescaping them. In general, + * nm_utils_strsplit_set_full() does not remove any backslash escape characters + * and does not unescaping. It only considers them for skipping to split at + * an escaped delimiter. * - * Returns: %NULL if @str is %NULL or contains only delimiters. - * Otherwise, a %NULL terminated strv array containing non-empty - * words, split at the delimiter characters (delimiter characters - * are removed). + * Returns: %NULL if @str is %NULL or "". + * If @str only contains delimiters and %NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY + * is not set, it also returns %NULL. + * Otherwise, a %NULL terminated strv array containing the split words. + * (delimiter characters are removed). * The strings to which the result strv array points to are allocated * after the returned result itself. Don't free the strings themself, * but free everything with g_free(). @@ -1012,12 +1021,15 @@ nm_utils_strsplit_set_full (const char *str, NMUtilsStrsplitSetFlags flags) { const char **ptr, **ptr0; - gsize alloc_size, plen, i; + gsize alloc_size; + gsize plen; + gsize i; gsize str_len; char *s0; char *s; guint8 ch_lookup[256]; const gboolean f_allow_escaping = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING); + const gboolean f_preseve_empty = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY); if (!str) return NULL; @@ -1031,11 +1043,18 @@ nm_utils_strsplit_set_full (const char *str, nm_assert ( !f_allow_escaping || !_char_lookup_has (ch_lookup, '\\')); - while (_char_lookup_has (ch_lookup, str[0])) - str++; + if (!f_preseve_empty) { + while (_char_lookup_has (ch_lookup, str[0])) + str++; + } - if (!str[0]) + if (!str[0]) { + /* We return %NULL here, also with NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY. + * That makes nm_utils_strsplit_set_full() with NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY + * different from g_strsplit_set(), which would in this case return an empty array. + * If you need to handle %NULL, and "" specially, then check the input string first. */ return NULL; + } str_len = strlen (str) + 1; alloc_size = 8; @@ -1065,7 +1084,12 @@ nm_utils_strsplit_set_full (const char *str, ptr[plen++] = s; - nm_assert (s[0] && !_char_lookup_has (ch_lookup, s[0])); + if (s[0] == '\0') { + nm_assert (f_preseve_empty); + goto done; + } + nm_assert ( f_preseve_empty + || !_char_lookup_has (ch_lookup, s[0])); while (!_char_lookup_has (ch_lookup, s[0])) { if (G_UNLIKELY ( s[0] == '\\' @@ -1083,10 +1107,12 @@ nm_utils_strsplit_set_full (const char *str, nm_assert (_char_lookup_has (ch_lookup, s[0])); s[0] = '\0'; s++; - while (_char_lookup_has (ch_lookup, s[0])) - s++; - if (s[0] == '\0') - goto done; + if (!f_preseve_empty) { + while (_char_lookup_has (ch_lookup, s[0])) + s++; + if (s[0] == '\0') + goto done; + } } done: diff --git a/shared/nm-utils/nm-shared-utils.h b/shared/nm-utils/nm-shared-utils.h index 75ef429dd7..8ec6fa2f5a 100644 --- a/shared/nm-utils/nm-shared-utils.h +++ b/shared/nm-utils/nm-shared-utils.h @@ -334,7 +334,8 @@ int nm_utils_dbus_path_cmp (const char *dbus_path_a, const char *dbus_path_b); typedef enum { NM_UTILS_STRSPLIT_SET_FLAGS_NONE = 0, - NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING = (1u << 0), + NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY = (1u << 0), + NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING = (1u << 1), } NMUtilsStrsplitSetFlags; const char **nm_utils_strsplit_set_full (const char *str, @@ -342,6 +343,17 @@ const char **nm_utils_strsplit_set_full (const char *str, NMUtilsStrsplitSetFlags flags); static inline const char ** +nm_utils_strsplit_set_with_empty (const char *str, + const char *delimiters) +{ + /* this returns the same result as g_strsplit_set(str, delimiters, -1), except + * it does not deep-clone the strv array. + * Also, for @str == "", this returns %NULL while g_strsplit_set() would return + * an empty strv array. */ + return nm_utils_strsplit_set_full (str, delimiters, NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY); +} + +static inline const char ** nm_utils_strsplit_set (const char *str, const char *delimiters) { |