diff options
author | Christian Hergert <chergert@redhat.com> | 2022-09-29 13:45:34 -0700 |
---|---|---|
committer | Christian Hergert <chergert@redhat.com> | 2022-09-29 13:45:34 -0700 |
commit | 9f92328a1d3a12bd67e7b1f7100aa66a6b50bb57 (patch) | |
tree | a1a5fd44fb72e7d245b3b4cc3564a56a060320de | |
parent | 87e62bad1092ba795d8995ac27222e320fb1a14d (diff) | |
download | gtksourceview-9f92328a1d3a12bd67e7b1f7100aa66a6b50bb57.tar.gz |
testsuite/regex: walk subject as utf8 unless G_REGEX_RAW
This makes sure that we walk strings which are expected as UTF-8 by
characters so we don't split the multi-byte character. However, if
G_REGEX_RAW is set, walk by byte.
-rw-r--r-- | testsuite/test-regex.c | 34 |
1 files changed, 26 insertions, 8 deletions
diff --git a/testsuite/test-regex.c b/testsuite/test-regex.c index 1306f1c5..935f9ab8 100644 --- a/testsuite/test-regex.c +++ b/testsuite/test-regex.c @@ -128,15 +128,32 @@ compare_impl_regex_to_g_regex (const char *subject, g_clear_pointer (&mi1, g_match_info_free); g_clear_pointer (&mi2, impl_match_info_free); - for (int i = 0; i <= subject_len; i++) + if (compile_flags & G_REGEX_RAW) { - r1 = g_regex_match_full (reg1, subject, subject_len, i, match_flags, &mi1, &err1); - r2 = impl_regex_match_full (reg2, subject, subject_len, i, match_flags, &mi2, &err2); - g_assert_cmpint (r1, ==, r2); - g_assert_true (err1 == NULL || err2 != NULL); - assert_iterations (mi1, mi2); - g_clear_pointer (&mi1, g_match_info_free); - g_clear_pointer (&mi2, impl_match_info_free); + for (int i = 0; i <= subject_len; i++) + { + r1 = g_regex_match_full (reg1, subject, subject_len, i, match_flags, &mi1, &err1); + r2 = impl_regex_match_full (reg2, subject, subject_len, i, match_flags, &mi2, &err2); + g_assert_cmpint (r1, ==, r2); + g_assert_true (err1 == NULL || err2 != NULL); + assert_iterations (mi1, mi2); + g_clear_pointer (&mi1, g_match_info_free); + g_clear_pointer (&mi2, impl_match_info_free); + } + } + else + { + for (const char *iter = subject; *iter; iter = g_utf8_next_char (iter)) + { + gsize i = iter - subject; + r1 = g_regex_match_full (reg1, subject, subject_len, i, match_flags, &mi1, &err1); + r2 = impl_regex_match_full (reg2, subject, subject_len, i, match_flags, &mi2, &err2); + g_assert_cmpint (r1, ==, r2); + g_assert_true (err1 == NULL || err2 != NULL); + assert_iterations (mi1, mi2); + g_clear_pointer (&mi1, g_match_info_free); + g_clear_pointer (&mi2, impl_match_info_free); + } } g_clear_pointer (®1, g_regex_unref); @@ -195,6 +212,7 @@ test_compare (void) compare_impl_regex_to_g_regex ("hello\nworld\n", "(.*\\n)*", compile, match); compare_impl_regex_to_g_regex ("&aa", "\\baa\\b", compile, match); + compare_impl_regex_to_g_regex ("\342\200\223aa", "\\baa\\b", compile, match); /* this can be a invalid UTF-8 string if substring-ed, make glib think it's a raw string */ compare_impl_regex_to_g_regex ("\342\200\223aa", "\\baa\\b", compile | G_REGEX_RAW, match); |