summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Persch <chpe@gnome.org>2012-06-07 23:24:07 +0200
committerChristian Persch <chpe@gnome.org>2012-07-02 15:59:39 +0200
commite99e34f65f6cf66feaccde29e480965d525586ae (patch)
treee925f02e717fcb627f0d9dd1701cf6e6565e6ec4
parent1171215014bb9406ff8ae1ea91b1c251b4e7d71b (diff)
downloadglib-e99e34f65f6cf66feaccde29e480965d525586ae.tar.gz
regex: Add NOTEMPTY_ATSTART match option
Since PCRE 8.00 it supports a variant of PCRE_NOTEMPTY that works similarly except that it only applies to the start of the matched string but permits empty matches further in.
-rw-r--r--glib/gregex.c32
-rw-r--r--glib/gregex.h32
-rw-r--r--glib/tests/regex.c15
3 files changed, 48 insertions, 31 deletions
diff --git a/glib/gregex.c b/glib/gregex.c
index 184fc200e..07f8e52c5 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -148,7 +148,8 @@
G_REGEX_MATCH_BSR_ANYCRLF | \
G_REGEX_MATCH_BSR_ANY | \
G_REGEX_MATCH_PARTIAL_SOFT | \
- G_REGEX_MATCH_PARTIAL_HARD)
+ G_REGEX_MATCH_PARTIAL_HARD | \
+ G_REGEX_MATCH_NOTEMPTY_ATSTART)
/* we rely on these flags having the same values */
G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
@@ -167,20 +168,21 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
-G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
+G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
/* These PCRE flags are unused or not exposed publically in GRegexFlags, so
* it should be ok to reuse them for different things.
diff --git a/glib/gregex.h b/glib/gregex.h
index 6550fb689..3ac877e0a 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -366,6 +366,9 @@ typedef enum
* to #G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match
* is found, without continuing to search for a possible complete match. See
* see g_match_info_is_partial_match() for more information. Since: 2.34
+ * @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to
+ * the start of the matched string. For anchored
+ * patterns this can only happen for pattern containing "\K". Since: 2.34
*
* Flags specifying match-time options.
*
@@ -375,20 +378,21 @@ typedef enum
* adding a new flag. */
typedef enum
{
- G_REGEX_MATCH_ANCHORED = 1 << 4,
- G_REGEX_MATCH_NOTBOL = 1 << 7,
- G_REGEX_MATCH_NOTEOL = 1 << 8,
- G_REGEX_MATCH_NOTEMPTY = 1 << 10,
- G_REGEX_MATCH_PARTIAL = 1 << 15,
- G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
- G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
- G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
- G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
- G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
- G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
- G_REGEX_MATCH_BSR_ANY = 1 << 24,
- G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
- G_REGEX_MATCH_PARTIAL_HARD = 1 << 27
+ G_REGEX_MATCH_ANCHORED = 1 << 4,
+ G_REGEX_MATCH_NOTBOL = 1 << 7,
+ G_REGEX_MATCH_NOTEOL = 1 << 8,
+ G_REGEX_MATCH_NOTEMPTY = 1 << 10,
+ G_REGEX_MATCH_PARTIAL = 1 << 15,
+ G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
+ G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
+ G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
+ G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
+ G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
+ G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
+ G_REGEX_MATCH_BSR_ANY = 1 << 24,
+ G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
+ G_REGEX_MATCH_PARTIAL_HARD = 1 << 27,
+ G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
} GRegexMatchFlags;
/**
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index 005f48ff7..b70119f25 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -160,7 +160,7 @@ test_match_simple (gconstpointer d)
g_assert_cmpint (match, ==, data->expected);
}
-#define TEST_MATCH_SIMPLE(_pattern, _string, _compile_opts, _match_opts, _expected) { \
+#define TEST_MATCH_SIMPLE_NAMED(_name, _pattern, _string, _compile_opts, _match_opts, _expected) { \
TestMatchData *data; \
gchar *path; \
data = g_new0 (TestMatchData, 1); \
@@ -169,11 +169,18 @@ test_match_simple (gconstpointer d)
data->compile_opts = _compile_opts; \
data->match_opts = _match_opts; \
data->expected = _expected; \
- path = g_strdup_printf ("/regex/match-simple/%d", ++total); \
+ path = g_strdup_printf ("/regex/match-%s/%d", _name, ++total); \
g_test_add_data_func (path, data, test_match_simple); \
g_free (path); \
}
+#define TEST_MATCH_SIMPLE(_pattern, _string, _compile_opts, _match_opts, _expected) \
+ TEST_MATCH_SIMPLE_NAMED("simple", _pattern, _string, _compile_opts, _match_opts, _expected)
+#define TEST_MATCH_NOTEMPTY(_pattern, _string, _expected) \
+ TEST_MATCH_SIMPLE_NAMED("notempty", _pattern, _string, 0, G_REGEX_MATCH_NOTEMPTY, _expected)
+#define TEST_MATCH_NOTEMPTY_ATSTART(_pattern, _string, _expected) \
+ TEST_MATCH_SIMPLE_NAMED("notempty-atstart", _pattern, _string, 0, G_REGEX_MATCH_NOTEMPTY_ATSTART, _expected)
+
static void
test_match (gconstpointer d)
{
@@ -2683,5 +2690,9 @@ main (int argc, char *argv[])
"<a><b>", 0, 6, "<a>", 0, 3);
TEST_MATCH_ALL3("a+", "aaa", -1, 0, "aaa", 0, 3, "aa", 0, 2, "a", 0, 1);
+ /* NOTEMPTY matching */
+ TEST_MATCH_NOTEMPTY("a?b?", "xyz", FALSE);
+ TEST_MATCH_NOTEMPTY_ATSTART("a?b?", "xyz", TRUE);
+
return g_test_run ();
}