summaryrefslogtreecommitdiff
path: root/lib/regex-quote.c
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2011-03-08 10:09:47 +0100
committerBruno Haible <bruno@clisp.org>2011-03-08 10:09:47 +0100
commitfbfced515f325dd000c80fa35ade86e567d0ea3c (patch)
tree15edd13725f7da5660cd80cd1d968df9a7d24569 /lib/regex-quote.c
parent6ff7b70e24f0e84e9f65ef6d021ff239cad0b2b4 (diff)
downloadgnulib-fbfced515f325dd000c80fa35ade86e567d0ea3c.tar.gz
regex-quote: New API.
* lib/regex-quote.h: Include <stdbool.h>. (struct regex_quote_spec): New type. (regex_quote_spec_posix, regex_quote_spec_gnu, regex_quote_spec_pcre): New declarations. (regex_quote_length, regex_quote_copy, regex_quote): Take a 'const struct regex_quote_spec *' argument. * lib/regex-quote.c (RE_*, PCRE_*): New macros. (pcre_special): New constant. (regex_quote_spec_posix, regex_quote_spec_gnu, regex_quote_spec_pcre): New functions. (regex_quote_length, regex_quote_copy, regex_quote): Take a 'const struct regex_quote_spec *' argument. * modules/regex-quote (Depends-on): Add stdbool. * tests/test-regex-quote.c (check): Update for new API. Add test for anchored results. * NEWS: Mention the API change. Reported by Reuben Thomas and Eric Blake.
Diffstat (limited to 'lib/regex-quote.c')
-rw-r--r--lib/regex-quote.c182
1 files changed, 156 insertions, 26 deletions
diff --git a/lib/regex-quote.c b/lib/regex-quote.c
index 361cff0770..8b4cdb78be 100644
--- a/lib/regex-quote.c
+++ b/lib/regex-quote.c
@@ -31,56 +31,186 @@ static const char bre_special[] = "$^.*[]\\";
/* Characters that are special in an ERE. */
static const char ere_special[] = "$^.*[]\\+?{}()|";
+struct regex_quote_spec
+regex_quote_spec_posix (int cflags, bool anchored)
+{
+ struct regex_quote_spec result;
+
+ strcpy (result.special, cflags != 0 ? ere_special : bre_special);
+ result.multibyte = true;
+ result.anchored = anchored;
+
+ return result;
+}
+
+/* Syntax bit values, defined in GNU <regex.h>. We don't include it here,
+ otherwise this module would need to depend on gnulib module 'regex'. */
+#define RE_BK_PLUS_QM 0x00000002
+#define RE_INTERVALS 0x00000200
+#define RE_LIMITED_OPS 0x00000400
+#define RE_NEWLINE_ALT 0x00000800
+#define RE_NO_BK_BRACES 0x00001000
+#define RE_NO_BK_PARENS 0x00002000
+#define RE_NO_BK_VBAR 0x00008000
+
+struct regex_quote_spec
+regex_quote_spec_gnu (unsigned long /*reg_syntax_t*/ syntax, bool anchored)
+{
+ struct regex_quote_spec result;
+ char *p;
+
+ p = result.special;
+ memcpy (p, bre_special, sizeof (bre_special) - 1);
+ p += sizeof (bre_special) - 1;
+ if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_BK_PLUS_QM) == 0)
+ {
+ *p++ = '+';
+ *p++ = '?';
+ }
+ if ((syntax & RE_INTERVALS) != 0 && (syntax & RE_NO_BK_BRACES) != 0)
+ {
+ *p++ = '{';
+ *p++ = '}';
+ }
+ if ((syntax & RE_NO_BK_PARENS) != 0)
+ {
+ *p++ = '(';
+ *p++ = ')';
+ }
+ if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_NO_BK_VBAR) != 0)
+ *p++ = '|';
+ if ((syntax & RE_NEWLINE_ALT) != 0)
+ *p++ = '\n';
+ *p = '\0';
+
+ result.multibyte = true;
+ result.anchored = anchored;
+
+ return result;
+}
+
+/* Characters that are special in a PCRE. */
+static const char pcre_special[] = "$^.*[]\\+?{}()|";
+
+/* Options bit values, defined in <pcre.h>. We don't include it here, because
+ it is not a standard header. */
+#define PCRE_ANCHORED 0x00000010
+#define PCRE_EXTENDED 0x00000008
+
+struct regex_quote_spec
+regex_quote_spec_pcre (int options, bool anchored)
+{
+ struct regex_quote_spec result;
+ char *p;
+
+ p = result.special;
+ memcpy (p, bre_special, sizeof (pcre_special) - 1);
+ p += sizeof (pcre_special) - 1;
+ if (options & PCRE_EXTENDED)
+ {
+ *p++ = ' ';
+ *p++ = '\t';
+ *p++ = '\n';
+ *p++ = '\v';
+ *p++ = '\f';
+ *p++ = '\r';
+ *p++ = '#';
+ }
+ *p = '\0';
+
+ /* PCRE regular expressions consist of UTF-8 characters of options contains
+ PCRE_UTF8 and of single bytes otherwise. */
+ result.multibyte = false;
+ /* If options contains PCRE_ANCHORED, the anchoring is implicit. */
+ result.anchored = (options & PCRE_ANCHORED ? 0 : anchored);
+
+ return result;
+}
+
size_t
-regex_quote_length (const char *string, int cflags)
+regex_quote_length (const char *string, const struct regex_quote_spec *spec)
{
- const char *special = (cflags != 0 ? ere_special : bre_special);
+ const char *special = spec->special;
size_t length;
- mbui_iterator_t iter;
length = 0;
- for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
+ if (spec->anchored)
+ length += 2; /* for '^' at the beginning and '$' at the end */
+ if (spec->multibyte)
+ {
+ mbui_iterator_t iter;
+
+ for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
+ {
+ /* We know that special contains only ASCII characters. */
+ if (mb_len (mbui_cur (iter)) == 1
+ && strchr (special, * mbui_cur_ptr (iter)))
+ length += 1;
+ length += mb_len (mbui_cur (iter));
+ }
+ }
+ else
{
- /* We know that special contains only ASCII characters. */
- if (mb_len (mbui_cur (iter)) == 1
- && strchr (special, * mbui_cur_ptr (iter)))
- length += 1;
- length += mb_len (mbui_cur (iter));
+ const char *iter;
+
+ for (iter = string; *iter != '\0'; iter++)
+ {
+ if (strchr (special, *iter))
+ length += 1;
+ length += 1;
+ }
}
+
return length;
}
-/* Copies the quoted string to p and returns the incremented p.
- There must be room for regex_quote_length (string, cflags) + 1 bytes at p.
- */
char *
-regex_quote_copy (char *p, const char *string, int cflags)
+regex_quote_copy (char *p, const char *string, const struct regex_quote_spec *spec)
{
- const char *special = (cflags != 0 ? ere_special : bre_special);
- mbui_iterator_t iter;
+ const char *special = spec->special;
- for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
+ if (spec->anchored)
+ *p++ = '^';
+ if (spec->multibyte)
{
- /* We know that special contains only ASCII characters. */
- if (mb_len (mbui_cur (iter)) == 1
- && strchr (special, * mbui_cur_ptr (iter)))
- *p++ = '\\';
- memcpy (p, mbui_cur_ptr (iter), mb_len (mbui_cur (iter)));
- p += mb_len (mbui_cur (iter));
+ mbui_iterator_t iter;
+
+ for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
+ {
+ /* We know that special contains only ASCII characters. */
+ if (mb_len (mbui_cur (iter)) == 1
+ && strchr (special, * mbui_cur_ptr (iter)))
+ *p++ = '\\';
+ memcpy (p, mbui_cur_ptr (iter), mb_len (mbui_cur (iter)));
+ p += mb_len (mbui_cur (iter));
+ }
}
+ else
+ {
+ const char *iter;
+
+ for (iter = string; *iter != '\0'; iter++)
+ {
+ if (strchr (special, *iter))
+ *p++ = '\\';
+ *p++ = *iter++;
+ }
+ }
+ if (spec->anchored)
+ *p++ = '$';
+
return p;
}
-/* Returns the freshly allocated quoted string. */
char *
-regex_quote (const char *string, int cflags)
+regex_quote (const char *string, const struct regex_quote_spec *spec)
{
- size_t length = regex_quote_length (string, cflags);
+ size_t length = regex_quote_length (string, spec);
char *result = XNMALLOC (length + 1, char);
char *p;
p = result;
- p = regex_quote_copy (p, string, cflags);
+ p = regex_quote_copy (p, string, spec);
*p = '\0';
return result;
}