summaryrefslogtreecommitdiff
path: root/sed/regexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'sed/regexp.c')
-rw-r--r--sed/regexp.c267
1 files changed, 267 insertions, 0 deletions
diff --git a/sed/regexp.c b/sed/regexp.c
new file mode 100644
index 0000000..bbeccb5
--- /dev/null
+++ b/sed/regexp.c
@@ -0,0 +1,267 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1999, 2002, 2003, 2004, 2005, 2006
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+#include "sed.h"
+
+#include <ctype.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef gettext_noop
+# define N_(String) gettext_noop(String)
+#else
+# define N_(String) (String)
+#endif
+
+extern bool use_extended_syntax_p;
+
+static const char errors[] =
+ "no previous regular expression\0"
+ "cannot specify modifiers on empty regexp";
+
+#define NO_REGEX (errors)
+#define BAD_MODIF (NO_REGEX + sizeof(N_("no previous regular expression")))
+#define END_ERRORS (BAD_MODIF + sizeof(N_("cannot specify modifiers on empty regexp")))
+
+
+
+static void
+compile_regex_1 (new_regex, needed_sub)
+ struct regex *new_regex;
+ int needed_sub;
+{
+#ifdef REG_PERL
+ int errcode;
+ errcode = regncomp(&new_regex->pattern, new_regex->re, new_regex->sz,
+ (needed_sub ? 0 : REG_NOSUB)
+ | new_regex->flags
+ | extended_regexp_flags);
+
+ if (errcode)
+ {
+ char errorbuf[200];
+ regerror(errcode, NULL, errorbuf, 200);
+ bad_prog(gettext(errorbuf));
+ }
+#else
+ const char *error;
+ int syntax = ((extended_regexp_flags & REG_EXTENDED)
+ ? RE_SYNTAX_POSIX_EXTENDED
+ : RE_SYNTAX_POSIX_BASIC);
+
+ syntax &= ~RE_DOT_NOT_NULL;
+ syntax |= RE_NO_POSIX_BACKTRACKING;
+
+ switch (posixicity)
+ {
+ case POSIXLY_EXTENDED:
+ syntax &= ~RE_UNMATCHED_RIGHT_PAREN_ORD;
+ break;
+ case POSIXLY_CORRECT:
+ syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD;
+ break;
+ case POSIXLY_BASIC:
+ syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD | RE_LIMITED_OPS | RE_NO_GNU_OPS;
+ break;
+ }
+
+#ifdef RE_ICASE
+ syntax |= (new_regex->flags & REG_ICASE) ? RE_ICASE : 0;
+#endif
+#ifdef RE_NO_SUB
+ syntax |= needed_sub ? 0 : RE_NO_SUB;
+#endif
+
+ new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8));
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (new_regex->flags & REG_NEWLINE)
+ {
+ /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ }
+
+ re_set_syntax (syntax);
+ error = re_compile_pattern (new_regex->re, new_regex->sz,
+ &new_regex->pattern);
+ new_regex->pattern.newline_anchor = (new_regex->flags & REG_NEWLINE) != 0;
+
+ new_regex->pattern.translate = NULL;
+#ifndef RE_ICASE
+ if (new_regex->flags & REG_ICASE)
+ {
+ static char translate[1 << (sizeof(char) * 8)];
+ int i;
+ for (i = 0; i < sizeof(translate) / sizeof(char); i++)
+ translate[i] = tolower (i);
+
+ new_regex->pattern.translate = translate;
+ }
+#endif
+
+ if (error)
+ bad_prog(error);
+#endif
+
+ /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */
+ if (needed_sub
+ && new_regex->pattern.re_nsub < needed_sub - 1
+ && posixicity == POSIXLY_EXTENDED)
+ {
+ char buf[200];
+ sprintf(buf, _("invalid reference \\%d on `s' command's RHS"),
+ needed_sub - 1);
+ bad_prog(buf);
+ }
+}
+
+struct regex *
+compile_regex(b, flags, needed_sub)
+ struct buffer *b;
+ int flags;
+ int needed_sub;
+{
+ struct regex *new_regex;
+ size_t re_len;
+
+ /* // matches the last RE */
+ if (size_buffer(b) == 0)
+ {
+ if (flags > 0)
+ bad_prog(_(BAD_MODIF));
+ return NULL;
+ }
+
+ re_len = size_buffer(b);
+ new_regex = ck_malloc(sizeof (struct regex) + re_len - 1);
+ new_regex->flags = flags;
+ memcpy (new_regex->re, get_buffer(b), re_len);
+
+#ifdef REG_PERL
+ new_regex->sz = re_len;
+#else
+ /* GNU regex does not process \t & co. */
+ new_regex->sz = normalize_text(new_regex->re, re_len, TEXT_REGEX);
+#endif
+
+ compile_regex_1 (new_regex, needed_sub);
+ return new_regex;
+}
+
+#ifdef REG_PERL
+static void
+copy_regs (regs, pmatch, nregs)
+ struct re_registers *regs;
+ regmatch_t *pmatch;
+ int nregs;
+{
+ int i;
+ int need_regs = nregs + 1;
+ /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+ uses. */
+
+ /* Have the register data arrays been allocated? */
+ if (!regs->start)
+ { /* No. So allocate them with malloc. */
+ regs->start = MALLOC (need_regs, regoff_t);
+ regs->end = MALLOC (need_regs, regoff_t);
+ regs->num_regs = need_regs;
+ }
+ else if (need_regs > regs->num_regs)
+ { /* Yes. We also need more elements than were already
+ allocated, so reallocate them. */
+ regs->start = REALLOC (regs->start, need_regs, regoff_t);
+ regs->end = REALLOC (regs->end, need_regs, regoff_t);
+ regs->num_regs = need_regs;
+ }
+
+ /* Copy the regs. */
+ for (i = 0; i < nregs; ++i)
+ {
+ regs->start[i] = pmatch[i].rm_so;
+ regs->end[i] = pmatch[i].rm_eo;
+ }
+ for ( ; i < regs->num_regs; ++i)
+ regs->start[i] = regs->end[i] = -1;
+}
+#endif
+
+int
+match_regex(regex, buf, buflen, buf_start_offset, regarray, regsize)
+ struct regex *regex;
+ char *buf;
+ size_t buflen;
+ size_t buf_start_offset;
+ struct re_registers *regarray;
+ int regsize;
+{
+ int ret;
+ static struct regex *regex_last;
+#ifdef REG_PERL
+ regmatch_t rm[10], *regmatch = rm;
+ if (regsize > 10)
+ regmatch = (regmatch_t *) alloca (sizeof (regmatch_t) * regsize);
+#endif
+
+ /* printf ("Matching from %d/%d\n", buf_start_offset, buflen); */
+
+ /* Keep track of the last regexp matched. */
+ if (!regex)
+ {
+ regex = regex_last;
+ if (!regex_last)
+ bad_prog(_(NO_REGEX));
+ }
+ else
+ regex_last = regex;
+
+#ifdef REG_PERL
+ regmatch[0].rm_so = (int)buf_start_offset;
+ regmatch[0].rm_eo = (int)buflen;
+ ret = regexec (&regex->pattern, buf, regsize, regmatch, REG_STARTEND);
+
+ if (regsize)
+ copy_regs (regarray, regmatch, regsize);
+
+ return (ret == 0);
+#else
+ if (regex->pattern.no_sub && regsize)
+ compile_regex_1 (regex, regsize);
+
+ regex->pattern.regs_allocated = REGS_REALLOCATE;
+
+ ret = re_search (&regex->pattern, buf, buflen, buf_start_offset,
+ buflen - buf_start_offset,
+ regsize ? regarray : NULL);
+
+ return (ret > -1);
+#endif
+}
+
+
+#ifdef DEBUG_LEAKS
+void
+release_regex(regex)
+ struct regex *regex;
+{
+ regfree(&regex->pattern);
+ free(regex);
+}
+#endif /*DEBUG_LEAKS*/