summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Hergert <chergert@redhat.com>2022-09-29 13:45:34 -0700
committerChristian Hergert <chergert@redhat.com>2022-09-29 13:45:34 -0700
commit9f92328a1d3a12bd67e7b1f7100aa66a6b50bb57 (patch)
treea1a5fd44fb72e7d245b3b4cc3564a56a060320de
parent87e62bad1092ba795d8995ac27222e320fb1a14d (diff)
downloadgtksourceview-9f92328a1d3a12bd67e7b1f7100aa66a6b50bb57.tar.gz
testsuite/regex: walk subject as utf8 unless G_REGEX_RAW
This makes sure that we walk strings which are expected as UTF-8 by characters so we don't split the multi-byte character. However, if G_REGEX_RAW is set, walk by byte.
-rw-r--r--testsuite/test-regex.c34
1 files changed, 26 insertions, 8 deletions
diff --git a/testsuite/test-regex.c b/testsuite/test-regex.c
index 1306f1c5..935f9ab8 100644
--- a/testsuite/test-regex.c
+++ b/testsuite/test-regex.c
@@ -128,15 +128,32 @@ compare_impl_regex_to_g_regex (const char *subject,
g_clear_pointer (&mi1, g_match_info_free);
g_clear_pointer (&mi2, impl_match_info_free);
- for (int i = 0; i <= subject_len; i++)
+ if (compile_flags & G_REGEX_RAW)
{
- r1 = g_regex_match_full (reg1, subject, subject_len, i, match_flags, &mi1, &err1);
- r2 = impl_regex_match_full (reg2, subject, subject_len, i, match_flags, &mi2, &err2);
- g_assert_cmpint (r1, ==, r2);
- g_assert_true (err1 == NULL || err2 != NULL);
- assert_iterations (mi1, mi2);
- g_clear_pointer (&mi1, g_match_info_free);
- g_clear_pointer (&mi2, impl_match_info_free);
+ for (int i = 0; i <= subject_len; i++)
+ {
+ r1 = g_regex_match_full (reg1, subject, subject_len, i, match_flags, &mi1, &err1);
+ r2 = impl_regex_match_full (reg2, subject, subject_len, i, match_flags, &mi2, &err2);
+ g_assert_cmpint (r1, ==, r2);
+ g_assert_true (err1 == NULL || err2 != NULL);
+ assert_iterations (mi1, mi2);
+ g_clear_pointer (&mi1, g_match_info_free);
+ g_clear_pointer (&mi2, impl_match_info_free);
+ }
+ }
+ else
+ {
+ for (const char *iter = subject; *iter; iter = g_utf8_next_char (iter))
+ {
+ gsize i = iter - subject;
+ r1 = g_regex_match_full (reg1, subject, subject_len, i, match_flags, &mi1, &err1);
+ r2 = impl_regex_match_full (reg2, subject, subject_len, i, match_flags, &mi2, &err2);
+ g_assert_cmpint (r1, ==, r2);
+ g_assert_true (err1 == NULL || err2 != NULL);
+ assert_iterations (mi1, mi2);
+ g_clear_pointer (&mi1, g_match_info_free);
+ g_clear_pointer (&mi2, impl_match_info_free);
+ }
}
g_clear_pointer (&reg1, g_regex_unref);
@@ -195,6 +212,7 @@ test_compare (void)
compare_impl_regex_to_g_regex ("hello\nworld\n", "(.*\\n)*", compile, match);
compare_impl_regex_to_g_regex ("&aa", "\\baa\\b", compile, match);
+ compare_impl_regex_to_g_regex ("\342\200\223aa", "\\baa\\b", compile, match);
/* this can be a invalid UTF-8 string if substring-ed, make glib think it's a raw string */
compare_impl_regex_to_g_regex ("\342\200\223aa", "\\baa\\b", compile | G_REGEX_RAW, match);