summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Persch <chpe@gnome.org>2012-06-08 00:56:44 +0200
committerChristian Persch <chpe@gnome.org>2012-07-02 16:09:30 +0200
commita6e3eb6eced6b4f1d816a0ea73adb5081145730a (patch)
treed2bc878405127994b2fd4f14973a6eace176a6fe
parentd18c3010156bdccebdf18f09aaaac334c0b5d391 (diff)
downloadglib-wip/pcre-mark.tar.gz
regex: Add NO_START_OPTIMIZE compile and match flagswip/pcre-mark
PCRE_NO_START_OPTIMIZE exists since PCRE 7.9, but was not usefully before since it only affects callout (which GRegex doesn't support) and backtracking control verbs which the last commit makes use of.
-rw-r--r--glib/gregex.c69
-rw-r--r--glib/gregex.h39
-rw-r--r--glib/tests/regex.c2
3 files changed, 61 insertions, 49 deletions
diff --git a/glib/gregex.c b/glib/gregex.c
index e69b2c0f5..282700a53 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -128,7 +128,8 @@
G_REGEX_NEWLINE_CRLF | \
G_REGEX_NEWLINE_ANYCRLF | \
G_REGEX_BSR_ANYCRLF | \
- G_REGEX_JAVASCRIPT_COMPAT)
+ G_REGEX_JAVASCRIPT_COMPAT | \
+ G_REGEX_NO_START_OPTIMIZE)
/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
@@ -136,20 +137,21 @@
G_REGEX_OPTIMIZE)
/* Mask of all the possible values for GRegexMatchFlags. */
-#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \
- G_REGEX_MATCH_NOTBOL | \
- G_REGEX_MATCH_NOTEOL | \
- G_REGEX_MATCH_NOTEMPTY | \
- G_REGEX_MATCH_PARTIAL | \
- G_REGEX_MATCH_NEWLINE_CR | \
- G_REGEX_MATCH_NEWLINE_LF | \
- G_REGEX_MATCH_NEWLINE_CRLF | \
- G_REGEX_MATCH_NEWLINE_ANY | \
- G_REGEX_MATCH_NEWLINE_ANYCRLF | \
- G_REGEX_MATCH_BSR_ANYCRLF | \
- G_REGEX_MATCH_BSR_ANY | \
- G_REGEX_MATCH_PARTIAL_SOFT | \
- G_REGEX_MATCH_PARTIAL_HARD | \
+#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \
+ G_REGEX_MATCH_NOTBOL | \
+ G_REGEX_MATCH_NOTEOL | \
+ G_REGEX_MATCH_NOTEMPTY | \
+ G_REGEX_MATCH_PARTIAL | \
+ G_REGEX_MATCH_NEWLINE_CR | \
+ G_REGEX_MATCH_NEWLINE_LF | \
+ G_REGEX_MATCH_NEWLINE_CRLF | \
+ G_REGEX_MATCH_NEWLINE_ANY | \
+ G_REGEX_MATCH_NEWLINE_ANYCRLF | \
+ G_REGEX_MATCH_BSR_ANYCRLF | \
+ G_REGEX_MATCH_BSR_ANY | \
+ G_REGEX_MATCH_NO_START_OPTIMIZE | \
+ G_REGEX_MATCH_PARTIAL_SOFT | \
+ G_REGEX_MATCH_PARTIAL_HARD | \
G_REGEX_MATCH_NOTEMPTY_ATSTART)
/* we rely on these flags having the same values */
@@ -169,22 +171,24 @@ G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
-
-G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
+G_STATIC_ASSERT (G_REGEX_NO_START_OPTIMIZE == PCRE_NO_START_OPTIMIZE);
+
+G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
+G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
+G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
+G_STATIC_ASSERT (G_REGEX_MATCH_NO_START_OPTIMIZE == PCRE_NO_START_OPTIMIZE);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
+G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
+G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
/* These PCRE flags are unused or not exposed publically in GRegexFlags, so
* it should be ok to reuse them for different things.
@@ -659,6 +663,9 @@ g_match_info_get_string (const GMatchInfo *match_info)
* the argument of the last verb encountered in the whole matching
* process. Otherwise, $NULL is returned.
*
+ * See <ulink>man:pcrepattern<ulink> for more information on
+ * backtracking control verbs.
+ *
* Returns: (transfer none): the mark, or %NULL
*
* Since: 2.34
diff --git a/glib/gregex.h b/glib/gregex.h
index 694440603..0640aac4d 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -279,7 +279,8 @@ GQuark g_regex_error_quark (void);
* G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence
* is recognised. If this option is set, then "\R" only recognizes the newline
* characters '\r', '\n' and '\r\n'. Since: 2.34
- *
+ * @G_REGEX_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect
+ * results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34
*
* Flags specifying compile-time options.
*
@@ -306,7 +307,8 @@ typedef enum
G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
G_REGEX_BSR_ANYCRLF = 1 << 23,
- G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
+ G_REGEX_JAVASCRIPT_COMPAT = 1 << 25,
+ G_REGEX_NO_START_OPTIMIZE = 1 << 26
} GRegexCompileFlags;
/**
@@ -372,6 +374,8 @@ typedef enum
* @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to
* the start of the matched string. For anchored
* patterns this can only happen for pattern containing "\K". Since: 2.34
+ * @G_REGEX_MATCH_NO_START_OPTIMIZE: Disable some optimizations that will cause incorrect
+ * results for g_match_info_get_mark() when using backtracking control verbs. Since: 2.34
*
* Flags specifying match-time options.
*
@@ -381,21 +385,22 @@ typedef enum
* adding a new flag. */
typedef enum
{
- G_REGEX_MATCH_ANCHORED = 1 << 4,
- G_REGEX_MATCH_NOTBOL = 1 << 7,
- G_REGEX_MATCH_NOTEOL = 1 << 8,
- G_REGEX_MATCH_NOTEMPTY = 1 << 10,
- G_REGEX_MATCH_PARTIAL = 1 << 15,
- G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
- G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
- G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
- G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
- G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
- G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
- G_REGEX_MATCH_BSR_ANY = 1 << 24,
- G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
- G_REGEX_MATCH_PARTIAL_HARD = 1 << 27,
- G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
+ G_REGEX_MATCH_ANCHORED = 1 << 4,
+ G_REGEX_MATCH_NOTBOL = 1 << 7,
+ G_REGEX_MATCH_NOTEOL = 1 << 8,
+ G_REGEX_MATCH_NOTEMPTY = 1 << 10,
+ G_REGEX_MATCH_PARTIAL = 1 << 15,
+ G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
+ G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
+ G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
+ G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
+ G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
+ G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
+ G_REGEX_MATCH_BSR_ANY = 1 << 24,
+ G_REGEX_MATCH_NO_START_OPTIMIZE = 1 << 26,
+ G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
+ G_REGEX_MATCH_PARTIAL_HARD = 1 << 27,
+ G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
} GRegexMatchFlags;
/**
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index ed5ab8060..54cc50f20 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -2146,7 +2146,7 @@ main (int argc, char *argv[])
TEST_NEW_CHECK_FLAGS ("(*ANYCRLF)a", 0, 0, G_REGEX_NEWLINE_ANYCRLF, 0);
TEST_NEW_CHECK_FLAGS ("(*BSR_ANYCRLF)a", 0, 0, G_REGEX_BSR_ANYCRLF, 0);
TEST_NEW_CHECK_FLAGS ("(*BSR_UNICODE)a", 0, 0, 0 /* this is the default in GRegex */, 0);
- TEST_NEW_CHECK_FLAGS ("(*NO_START_OPT)a", 0, 0, 0 /* not exposed in GRegex */, 0);
+ TEST_NEW_CHECK_FLAGS ("(*NO_START_OPT)", 0, 0, G_REGEX_NO_START_OPTIMIZE, 0);
/* TEST_NEW_FAIL(pattern, compile_opts, expected_error) */
TEST_NEW_FAIL("(", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);