diff options
author | Christian Persch <chpe@gnome.org> | 2012-06-08 00:49:00 +0200 |
---|---|---|
committer | Christian Persch <chpe@gnome.org> | 2012-07-02 16:08:11 +0200 |
commit | d18c3010156bdccebdf18f09aaaac334c0b5d391 (patch) | |
tree | 797ed81baa74362892ff9dae20b7b1f95fa920f5 | |
parent | 7483315f83cac1f54fd72c331e6eff0781b8560f (diff) | |
download | glib-d18c3010156bdccebdf18f09aaaac334c0b5d391.tar.gz |
regex: Add g_match_info_get_mark
Since PCRE 8.03, PCRE supports backtracking control verbs with a name argument.
g_match_info_get_mark() will return the argument of the last encountered verb
in the whole matching process for failed or partial matches, and in the matching
path only for matches.
-rw-r--r-- | docs/reference/glib/glib-sections.txt | 1 | ||||
-rw-r--r-- | glib/gregex.c | 40 | ||||
-rw-r--r-- | glib/gregex.h | 2 | ||||
-rw-r--r-- | glib/tests/regex.c | 50 |
4 files changed, 91 insertions, 2 deletions
diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt index d2e985e68..02652a7b9 100644 --- a/docs/reference/glib/glib-sections.txt +++ b/docs/reference/glib/glib-sections.txt @@ -1026,6 +1026,7 @@ g_regex_check_replacement GMatchInfo g_match_info_get_regex g_match_info_get_string +g_match_info_get_mark g_match_info_ref g_match_info_unref g_match_info_free diff --git a/glib/gregex.c b/glib/gregex.c index 839b1e6e5..e69b2c0f5 100644 --- a/glib/gregex.c +++ b/glib/gregex.c @@ -214,6 +214,8 @@ struct _GMatchInfo gint n_workspace; /* number of workspace elements */ const gchar *string; /* string passed to the match function */ gssize string_len; /* length of string */ + /* const */ guchar *mark; /* MARK when using backtracing control */ + pcre_extra extra; /* pcre_extra data */ }; struct _GRegex @@ -592,6 +594,20 @@ match_info_new (const GRegex *regex, match_info->offsets[0] = -1; match_info->offsets[1] = -1; + if (!is_dfa) + { + /* We need a pcre_extra to store a pointer to GMatchInfo::mark + * where pcre_exec will store the MARK. + * Since pcre_exec does not modify the extra data otherwise, + * it should be safe to do a shallow copy here. + */ + if (regex->extra) + match_info->extra = *regex->extra; + + match_info->extra.flags |= PCRE_EXTRA_MARK; + match_info->extra.mark = &match_info->mark; + } + return match_info; } @@ -634,6 +650,27 @@ g_match_info_get_string (const GMatchInfo *match_info) } /** + * g_match_info_get_mark: + * @match_info: a #GMatchInfo structure + * + * When the pattern contains backtracking control verbs, and there is + * a match, returns the argument of the verb last encountered on the + * matching path. If there is a partial match, or no match, returns + * the argument of the last verb encountered in the whole matching + * process. Otherwise, $NULL is returned. + * + * Returns: (transfer none): the mark, or %NULL + * + * Since: 2.34 + */ +const gchar * +g_match_info_get_mark (const GMatchInfo *match_info) +{ + g_return_val_if_fail (match_info != NULL, NULL); + return (const gchar *) match_info->mark; +} + +/** * g_match_info_ref: * @match_info: a #GMatchInfo * @@ -729,7 +766,7 @@ g_match_info_next (GMatchInfo *match_info, } match_info->matches = pcre_exec (match_info->regex->pcre_re, - match_info->regex->extra, + &match_info->extra, match_info->string, match_info->string_len, match_info->pos, @@ -1223,7 +1260,6 @@ g_match_info_fetch_all (const GMatchInfo *match_info) return result; } - /* GRegex */ GQuark diff --git a/glib/gregex.h b/glib/gregex.h index 29e5c6af4..694440603 100644 --- a/glib/gregex.h +++ b/glib/gregex.h @@ -527,6 +527,8 @@ gboolean g_regex_check_replacement (const gchar *replacement, /* Match info */ GRegex *g_match_info_get_regex (const GMatchInfo *match_info); const gchar *g_match_info_get_string (const GMatchInfo *match_info); +const gchar *g_match_info_get_mark (const GMatchInfo *match_info); + GMatchInfo *g_match_info_ref (GMatchInfo *match_info); void g_match_info_unref (GMatchInfo *match_info); diff --git a/glib/tests/regex.c b/glib/tests/regex.c index 72a0155f1..ed5ab8060 100644 --- a/glib/tests/regex.c +++ b/glib/tests/regex.c @@ -2048,6 +2048,48 @@ test_explicit_crlf (void) g_regex_unref (regex); } + +typedef struct { + const gchar *pattern; + const gchar *string; + const gchar *mark; + gboolean expected; +} TestMarkData; + +static void +test_mark (gconstpointer d) +{ + const TestMarkData *data = d; + GRegex *regex; + GMatchInfo *info; + gboolean match; + GError *error = NULL; + + regex = g_regex_new (data->pattern, 0, 0, &error); + g_assert_no_error (error); + + match = g_regex_match_full (regex, data->string, -1, 0, 0, &info, NULL); + g_assert_cmpint (match, ==, data->expected); + g_assert_cmpstr (g_match_info_get_mark (info), ==, data->mark); + + g_match_info_free (info); + g_regex_unref (regex); +} + +#define TEST_MARK(_pattern, _string, _expected, _mark) \ +{ \ + TestMarkData *data; \ + gchar *path; \ + data = g_new0 (TestMarkData, 1); \ + data->pattern = _pattern; \ + data->string = _string; \ + data->mark = _mark; \ + data->expected = _expected; \ + path = g_strdup_printf ("/regex/mark/%d", ++total); \ + g_test_add_data_func (path, data, test_mark); \ + g_free (path); \ +} + int main (int argc, char *argv[]) { @@ -2703,5 +2745,13 @@ main (int argc, char *argv[]) TEST_MATCH_NOTEMPTY("a?b?", "xyz", FALSE); TEST_MATCH_NOTEMPTY_ATSTART("a?b?", "xyz", TRUE); + /* MARK */ + TEST_MARK("^(A(*PRUNE:A)B|C(*PRUNE:B)D)", "AC", FALSE, "A"); + TEST_MARK("^(A(*PRUNE:A)B|C(*PRUNE:B)D)", "CB", FALSE, "B"); + TEST_MARK("(*MARK:A)(*SKIP:B)(C|X)", "C", TRUE, "A"); + TEST_MARK("(*MARK:A)(*SKIP:B)(C|X)", "D", FALSE, "A"); + TEST_MARK("X(*MARK:A)Y|X(*MARK:B)Z", "XY", TRUE, "A"); + TEST_MARK("X(*MARK:A)Y|X(*MARK:B)Z", "XZ", TRUE, "B"); + return g_test_run (); } |