summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Persch <chpe@gnome.org>2012-06-08 00:49:00 +0200
committerChristian Persch <chpe@gnome.org>2012-07-02 16:08:11 +0200
commitd18c3010156bdccebdf18f09aaaac334c0b5d391 (patch)
tree797ed81baa74362892ff9dae20b7b1f95fa920f5
parent7483315f83cac1f54fd72c331e6eff0781b8560f (diff)
downloadglib-d18c3010156bdccebdf18f09aaaac334c0b5d391.tar.gz
regex: Add g_match_info_get_mark
Since PCRE 8.03, PCRE supports backtracking control verbs with a name argument. g_match_info_get_mark() will return the argument of the last encountered verb in the whole matching process for failed or partial matches, and in the matching path only for matches.
-rw-r--r--docs/reference/glib/glib-sections.txt1
-rw-r--r--glib/gregex.c40
-rw-r--r--glib/gregex.h2
-rw-r--r--glib/tests/regex.c50
4 files changed, 91 insertions, 2 deletions
diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt
index d2e985e68..02652a7b9 100644
--- a/docs/reference/glib/glib-sections.txt
+++ b/docs/reference/glib/glib-sections.txt
@@ -1026,6 +1026,7 @@ g_regex_check_replacement
GMatchInfo
g_match_info_get_regex
g_match_info_get_string
+g_match_info_get_mark
g_match_info_ref
g_match_info_unref
g_match_info_free
diff --git a/glib/gregex.c b/glib/gregex.c
index 839b1e6e5..e69b2c0f5 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -214,6 +214,8 @@ struct _GMatchInfo
gint n_workspace; /* number of workspace elements */
const gchar *string; /* string passed to the match function */
gssize string_len; /* length of string */
+ /* const */ guchar *mark; /* MARK when using backtracing control */
+ pcre_extra extra; /* pcre_extra data */
};
struct _GRegex
@@ -592,6 +594,20 @@ match_info_new (const GRegex *regex,
match_info->offsets[0] = -1;
match_info->offsets[1] = -1;
+ if (!is_dfa)
+ {
+ /* We need a pcre_extra to store a pointer to GMatchInfo::mark
+ * where pcre_exec will store the MARK.
+ * Since pcre_exec does not modify the extra data otherwise,
+ * it should be safe to do a shallow copy here.
+ */
+ if (regex->extra)
+ match_info->extra = *regex->extra;
+
+ match_info->extra.flags |= PCRE_EXTRA_MARK;
+ match_info->extra.mark = &match_info->mark;
+ }
+
return match_info;
}
@@ -634,6 +650,27 @@ g_match_info_get_string (const GMatchInfo *match_info)
}
/**
+ * g_match_info_get_mark:
+ * @match_info: a #GMatchInfo structure
+ *
+ * When the pattern contains backtracking control verbs, and there is
+ * a match, returns the argument of the verb last encountered on the
+ * matching path. If there is a partial match, or no match, returns
+ * the argument of the last verb encountered in the whole matching
+ * process. Otherwise, $NULL is returned.
+ *
+ * Returns: (transfer none): the mark, or %NULL
+ *
+ * Since: 2.34
+ */
+const gchar *
+g_match_info_get_mark (const GMatchInfo *match_info)
+{
+ g_return_val_if_fail (match_info != NULL, NULL);
+ return (const gchar *) match_info->mark;
+}
+
+/**
* g_match_info_ref:
* @match_info: a #GMatchInfo
*
@@ -729,7 +766,7 @@ g_match_info_next (GMatchInfo *match_info,
}
match_info->matches = pcre_exec (match_info->regex->pcre_re,
- match_info->regex->extra,
+ &match_info->extra,
match_info->string,
match_info->string_len,
match_info->pos,
@@ -1223,7 +1260,6 @@ g_match_info_fetch_all (const GMatchInfo *match_info)
return result;
}
-
/* GRegex */
GQuark
diff --git a/glib/gregex.h b/glib/gregex.h
index 29e5c6af4..694440603 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -527,6 +527,8 @@ gboolean g_regex_check_replacement (const gchar *replacement,
/* Match info */
GRegex *g_match_info_get_regex (const GMatchInfo *match_info);
const gchar *g_match_info_get_string (const GMatchInfo *match_info);
+const gchar *g_match_info_get_mark (const GMatchInfo *match_info);
+
GMatchInfo *g_match_info_ref (GMatchInfo *match_info);
void g_match_info_unref (GMatchInfo *match_info);
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index 72a0155f1..ed5ab8060 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -2048,6 +2048,48 @@ test_explicit_crlf (void)
g_regex_unref (regex);
}
+
+typedef struct {
+ const gchar *pattern;
+ const gchar *string;
+ const gchar *mark;
+ gboolean expected;
+} TestMarkData;
+
+static void
+test_mark (gconstpointer d)
+{
+ const TestMarkData *data = d;
+ GRegex *regex;
+ GMatchInfo *info;
+ gboolean match;
+ GError *error = NULL;
+
+ regex = g_regex_new (data->pattern, 0, 0, &error);
+ g_assert_no_error (error);
+
+ match = g_regex_match_full (regex, data->string, -1, 0, 0, &info, NULL);
+ g_assert_cmpint (match, ==, data->expected);
+ g_assert_cmpstr (g_match_info_get_mark (info), ==, data->mark);
+
+ g_match_info_free (info);
+ g_regex_unref (regex);
+}
+
+#define TEST_MARK(_pattern, _string, _expected, _mark) \
+{ \
+ TestMarkData *data; \
+ gchar *path; \
+ data = g_new0 (TestMarkData, 1); \
+ data->pattern = _pattern; \
+ data->string = _string; \
+ data->mark = _mark; \
+ data->expected = _expected; \
+ path = g_strdup_printf ("/regex/mark/%d", ++total); \
+ g_test_add_data_func (path, data, test_mark); \
+ g_free (path); \
+}
+
int
main (int argc, char *argv[])
{
@@ -2703,5 +2745,13 @@ main (int argc, char *argv[])
TEST_MATCH_NOTEMPTY("a?b?", "xyz", FALSE);
TEST_MATCH_NOTEMPTY_ATSTART("a?b?", "xyz", TRUE);
+ /* MARK */
+ TEST_MARK("^(A(*PRUNE:A)B|C(*PRUNE:B)D)", "AC", FALSE, "A");
+ TEST_MARK("^(A(*PRUNE:A)B|C(*PRUNE:B)D)", "CB", FALSE, "B");
+ TEST_MARK("(*MARK:A)(*SKIP:B)(C|X)", "C", TRUE, "A");
+ TEST_MARK("(*MARK:A)(*SKIP:B)(C|X)", "D", FALSE, "A");
+ TEST_MARK("X(*MARK:A)Y|X(*MARK:B)Z", "XY", TRUE, "A");
+ TEST_MARK("X(*MARK:A)Y|X(*MARK:B)Z", "XZ", TRUE, "B");
+
return g_test_run ();
}