/* Construct a regular expression from a literal string. Copyright (C) 1995, 2010-2023 Free Software Foundation, Inc. Written by Bruno Haible , 2010. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include /* Specification. */ #include "regex-quote.h" #include #include "mbuiter.h" #include "xalloc.h" /* Characters that are special in a BRE. */ static const char bre_special[] = "$^.*[\\"; /* Characters that are special in an ERE. */ static const char ere_special[] = "$^.*[\\+?{()|"; struct regex_quote_spec regex_quote_spec_posix (int cflags, bool anchored) { struct regex_quote_spec result; strcpy (result.special, cflags != 0 ? ere_special : bre_special); result.multibyte = true; result.anchored = anchored; return result; } /* Syntax bit values, defined in GNU . We don't include it here, otherwise this module would need to depend on gnulib module 'regex'. */ #define RE_BK_PLUS_QM 0x00000002 #define RE_INTERVALS 0x00000200 #define RE_LIMITED_OPS 0x00000400 #define RE_NEWLINE_ALT 0x00000800 #define RE_NO_BK_BRACES 0x00001000 #define RE_NO_BK_PARENS 0x00002000 #define RE_NO_BK_VBAR 0x00008000 struct regex_quote_spec regex_quote_spec_gnu (unsigned long /*reg_syntax_t*/ syntax, bool anchored) { struct regex_quote_spec result; char *p; p = result.special; memcpy (p, bre_special, sizeof (bre_special) - 1); p += sizeof (bre_special) - 1; if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_BK_PLUS_QM) == 0) { *p++ = '+'; *p++ = '?'; } if ((syntax & RE_INTERVALS) != 0 && (syntax & RE_NO_BK_BRACES) != 0) { *p++ = '{'; *p++ = '}'; } if ((syntax & RE_NO_BK_PARENS) != 0) { *p++ = '('; *p++ = ')'; } if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_NO_BK_VBAR) != 0) *p++ = '|'; if ((syntax & RE_NEWLINE_ALT) != 0) *p++ = '\n'; *p = '\0'; result.multibyte = true; result.anchored = anchored; return result; } /* Characters that are special in a PCRE. */ static const char pcre_special[] = "$^.*[]\\+?{}()|"; /* Options bit values, defined in . We don't include it here, because it is not a standard header. */ #define PCRE_ANCHORED 0x00000010 #define PCRE_EXTENDED 0x00000008 struct regex_quote_spec regex_quote_spec_pcre (int options, bool anchored) { struct regex_quote_spec result; char *p; p = result.special; memcpy (p, pcre_special, sizeof (pcre_special) - 1); p += sizeof (pcre_special) - 1; if (options & PCRE_EXTENDED) { *p++ = ' '; *p++ = '\t'; *p++ = '\n'; *p++ = '\v'; *p++ = '\f'; *p++ = '\r'; *p++ = '#'; } *p = '\0'; /* PCRE regular expressions consist of UTF-8 characters of options contains PCRE_UTF8 and of single bytes otherwise. */ result.multibyte = false; /* If options contains PCRE_ANCHORED, the anchoring is implicit. */ result.anchored = (options & PCRE_ANCHORED ? 0 : anchored); return result; } size_t regex_quote_length (const char *string, const struct regex_quote_spec *spec) { const char *special = spec->special; size_t length; length = 0; if (spec->anchored) length += 2; /* for '^' at the beginning and '$' at the end */ if (spec->multibyte) { mbui_iterator_t iter; for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter)) { /* We know that special contains only ASCII characters. */ if (mb_len (mbui_cur (iter)) == 1 && strchr (special, * mbui_cur_ptr (iter))) length += 1; length += mb_len (mbui_cur (iter)); } } else { const char *iter; for (iter = string; *iter != '\0'; iter++) { if (strchr (special, *iter)) length += 1; length += 1; } } return length; } char * regex_quote_copy (char *p, const char *string, const struct regex_quote_spec *spec) { const char *special = spec->special; if (spec->anchored) *p++ = '^'; if (spec->multibyte) { mbui_iterator_t iter; for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter)) { /* We know that special contains only ASCII characters. */ if (mb_len (mbui_cur (iter)) == 1 && strchr (special, * mbui_cur_ptr (iter))) *p++ = '\\'; memcpy (p, mbui_cur_ptr (iter), mb_len (mbui_cur (iter))); p += mb_len (mbui_cur (iter)); } } else { const char *iter; for (iter = string; *iter != '\0'; iter++) { if (strchr (special, *iter)) *p++ = '\\'; *p++ = *iter++; } } if (spec->anchored) *p++ = '$'; return p; } char * regex_quote (const char *string, const struct regex_quote_spec *spec) { size_t length = regex_quote_length (string, spec); char *result = XNMALLOC (length + 1, char); char *p; p = result; p = regex_quote_copy (p, string, spec); *p = '\0'; return result; }