summaryrefslogtreecommitdiff
path: root/sed/compile.c
diff options
context:
space:
mode:
Diffstat (limited to 'sed/compile.c')
-rw-r--r--sed/compile.c1734
1 files changed, 1734 insertions, 0 deletions
diff --git a/sed/compile.c b/sed/compile.c
new file mode 100644
index 0000000..513fac5
--- /dev/null
+++ b/sed/compile.c
@@ -0,0 +1,1734 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006,2008,2010
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* compile.c: translate sed source into internal form */
+
+#include "sed.h"
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <obstack.h>
+
+
+#define YMAP_LENGTH 256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/
+#define VECTOR_ALLOC_INCREMENT 40
+
+/* let's not confuse text editors that have only dumb bracket-matching... */
+#define OPEN_BRACKET '['
+#define CLOSE_BRACKET ']'
+#define OPEN_BRACE '{'
+#define CLOSE_BRACE '}'
+
+struct prog_info {
+ /* When we're reading a script command from a string, `prog.base'
+ points to the first character in the string, 'prog.cur' points
+ to the current character in the string, and 'prog.end' points
+ to the end of the string. This allows us to compile script
+ strings that contain nulls. */
+ const unsigned char *base;
+ const unsigned char *cur;
+ const unsigned char *end;
+
+ /* This is the current script file. If it is NULL, we are reading
+ from a string stored at `prog.cur' instead. If both `prog.file'
+ and `prog.cur' are NULL, we're in trouble! */
+ FILE *file;
+};
+
+/* Information used to give out useful and informative error messages. */
+struct error_info {
+ /* This is the name of the current script file. */
+ const char *name;
+
+ /* This is the number of the current script line that we're compiling. */
+ countT line;
+
+ /* This is the index of the "-e" expressions on the command line. */
+ countT string_expr_count;
+};
+
+
+/* Label structure used to resolve GOTO's, labels, and block beginnings. */
+struct sed_label {
+ countT v_index; /* index of vector element being referenced */
+ char *name; /* NUL-terminated name of the label */
+ struct error_info err_info; /* track where `{}' blocks start */
+ struct sed_label *next; /* linked list (stack) */
+};
+
+struct special_files {
+ struct output outf;
+ FILE **pfp;
+};
+
+FILE *my_stdin, *my_stdout, *my_stderr;
+struct special_files special_files[] = {
+ { { "/dev/stdin", false, NULL, NULL }, &my_stdin },
+ { { "/dev/stdout", false, NULL, NULL }, &my_stdout },
+ { { "/dev/stderr", false, NULL, NULL }, &my_stderr },
+ { { NULL, false, NULL, NULL }, NULL }
+};
+
+
+/* Where we are in the processing of the input. */
+static struct prog_info prog;
+static struct error_info cur_input;
+
+/* Information about labels and jumps-to-labels. This is used to do
+ the required backpatching after we have compiled all the scripts. */
+static struct sed_label *jumps = NULL;
+static struct sed_label *labels = NULL;
+
+/* We wish to detect #n magic only in the first input argument;
+ this flag tracks when we have consumed the first file of input. */
+static bool first_script = true;
+
+/* Allow for scripts like "sed -e 'i\' -e foo": */
+static struct buffer *pending_text = NULL;
+static struct text_buf *old_text_buf = NULL;
+
+/* Information about block start positions. This is used to backpatch
+ block end positions. */
+static struct sed_label *blocks = NULL;
+
+/* Use an obstack for compilation. */
+static struct obstack obs;
+
+/* Various error messages we may want to print */
+static const char errors[] =
+ "multiple `!'s\0"
+ "unexpected `,'\0"
+ "invalid usage of +N or ~N as first address\0"
+ "unmatched `{'\0"
+ "unexpected `}'\0"
+ "extra characters after command\0"
+ "expected \\ after `a', `c' or `i'\0"
+ "`}' doesn't want any addresses\0"
+ ": doesn't want any addresses\0"
+ "comments don't accept any addresses\0"
+ "missing command\0"
+ "command only uses one address\0"
+ "unterminated address regex\0"
+ "unterminated `s' command\0"
+ "unterminated `y' command\0"
+ "unknown option to `s'\0"
+ "multiple `p' options to `s' command\0"
+ "multiple `g' options to `s' command\0"
+ "multiple number options to `s' command\0"
+ "number option to `s' command may not be zero\0"
+ "strings for `y' command are different lengths\0"
+ "delimiter character is not a single-byte character\0"
+ "expected newer version of sed\0"
+ "invalid usage of line address 0\0"
+ "unknown command: `%c'\0"
+ "incomplete command";
+
+#define BAD_BANG (errors)
+#define BAD_COMMA (BAD_BANG + sizeof(N_("multiple `!'s")))
+#define BAD_STEP (BAD_COMMA + sizeof(N_("unexpected `,'")))
+#define EXCESS_OPEN_BRACE (BAD_STEP + sizeof(N_("invalid usage of +N or ~N as first address")))
+#define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof(N_("unmatched `{'")))
+#define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof(N_("unexpected `}'")))
+#define EXPECTED_SLASH (EXCESS_JUNK + sizeof(N_("extra characters after command")))
+#define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH + sizeof(N_("expected \\ after `a', `c' or `i'")))
+#define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR + sizeof(N_("`}' doesn't want any addresses")))
+#define NO_SHARP_ADDR (NO_COLON_ADDR + sizeof(N_(": doesn't want any addresses")))
+#define NO_COMMAND (NO_SHARP_ADDR + sizeof(N_("comments don't accept any addresses")))
+#define ONE_ADDR (NO_COMMAND + sizeof(N_("missing command")))
+#define UNTERM_ADDR_RE (ONE_ADDR + sizeof(N_("command only uses one address")))
+#define UNTERM_S_CMD (UNTERM_ADDR_RE + sizeof(N_("unterminated address regex")))
+#define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof(N_("unterminated `s' command")))
+#define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof(N_("unterminated `y' command")))
+#define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof(N_("unknown option to `s'")))
+#define EXCESS_G_OPT (EXCESS_P_OPT + sizeof(N_("multiple `p' options to `s' command")))
+#define EXCESS_N_OPT (EXCESS_G_OPT + sizeof(N_("multiple `g' options to `s' command")))
+#define ZERO_N_OPT (EXCESS_N_OPT + sizeof(N_("multiple number options to `s' command")))
+#define Y_CMD_LEN (ZERO_N_OPT + sizeof(N_("number option to `s' command may not be zero")))
+#define BAD_DELIM (Y_CMD_LEN + sizeof(N_("strings for `y' command are different lengths")))
+#define ANCIENT_VERSION (BAD_DELIM + sizeof(N_("delimiter character is not a single-byte character")))
+#define INVALID_LINE_0 (ANCIENT_VERSION + sizeof(N_("expected newer version of sed")))
+#define UNKNOWN_CMD (INVALID_LINE_0 + sizeof(N_("invalid usage of line address 0")))
+#define INCOMPLETE_CMD (UNKNOWN_CMD + sizeof(N_("unknown command: `%c'")))
+#define END_ERRORS (INCOMPLETE_CMD + sizeof(N_("incomplete command")))
+
+static struct output *file_read = NULL;
+static struct output *file_write = NULL;
+
+
+/* Complain about an unknown command and exit. */
+void
+bad_command(ch)
+ char ch;
+{
+ const char *msg = _(UNKNOWN_CMD);
+ char *unknown_cmd = xmalloc(strlen(msg));
+ sprintf(unknown_cmd, msg, ch);
+ bad_prog(unknown_cmd);
+}
+
+/* Complain about a programming error and exit. */
+void
+bad_prog(why)
+ const char *why;
+{
+ if (cur_input.name)
+ fprintf(stderr, _("%s: file %s line %lu: %s\n"),
+ myname, cur_input.name, (unsigned long)cur_input.line, why);
+ else
+ fprintf(stderr, _("%s: -e expression #%lu, char %lu: %s\n"),
+ myname,
+ (unsigned long)cur_input.string_expr_count,
+ (unsigned long)(prog.cur-prog.base),
+ why);
+ exit(EXIT_FAILURE);
+}
+
+
+/* Read the next character from the program. Return EOF if there isn't
+ anything to read. Keep cur_input.line up to date, so error messages
+ can be meaningful. */
+static int inchar (void);
+static int
+inchar()
+{
+ int ch = EOF;
+
+ if (prog.cur)
+ {
+ if (prog.cur < prog.end)
+ ch = *prog.cur++;
+ }
+ else if (prog.file)
+ {
+ if (!feof(prog.file))
+ ch = getc(prog.file);
+ }
+ if (ch == '\n')
+ ++cur_input.line;
+ return ch;
+}
+
+/* unget `ch' so the next call to inchar will return it. */
+static void savchar (int ch);
+static void
+savchar(ch)
+ int ch;
+{
+ if (ch == EOF)
+ return;
+ if (ch == '\n' && cur_input.line > 0)
+ --cur_input.line;
+ if (prog.cur)
+ {
+ if (prog.cur <= prog.base || *--prog.cur != ch)
+ panic("Called savchar() with unexpected pushback (%x)",
+ (unsigned char)ch);
+ }
+ else
+ ungetc(ch, prog.file);
+}
+
+/* Read the next non-blank character from the program. */
+static int in_nonblank (void);
+static int
+in_nonblank()
+{
+ int ch;
+ do
+ ch = inchar();
+ while (ISBLANK(ch));
+ return ch;
+}
+
+/* Read an integer value from the program. */
+static countT in_integer (int ch);
+static countT
+in_integer(ch)
+ int ch;
+{
+ countT num = 0;
+
+ while (ISDIGIT(ch))
+ {
+ num = num * 10 + ch - '0';
+ ch = inchar();
+ }
+ savchar(ch);
+ return num;
+}
+
+static int add_then_next (struct buffer *b, int ch);
+static int
+add_then_next(b, ch)
+ struct buffer *b;
+ int ch;
+{
+ add1_buffer(b, ch);
+ return inchar();
+}
+
+static char * convert_number (char *, char *, const char *, int);
+static char *
+convert_number(result, buf, bufend, base)
+ char *result;
+ char *buf;
+ const char *bufend;
+ int base;
+{
+ int n = 0;
+ int max = 1;
+ char *p;
+
+ for (p=buf+1; p < bufend && max <= 255; ++p, max *= base)
+ {
+ int d = -1;
+ switch (*p)
+ {
+ case '0': d = 0x0; break;
+ case '1': d = 0x1; break;
+ case '2': d = 0x2; break;
+ case '3': d = 0x3; break;
+ case '4': d = 0x4; break;
+ case '5': d = 0x5; break;
+ case '6': d = 0x6; break;
+ case '7': d = 0x7; break;
+ case '8': d = 0x8; break;
+ case '9': d = 0x9; break;
+ case 'A': case 'a': d = 0xa; break;
+ case 'B': case 'b': d = 0xb; break;
+ case 'C': case 'c': d = 0xc; break;
+ case 'D': case 'd': d = 0xd; break;
+ case 'E': case 'e': d = 0xe; break;
+ case 'F': case 'f': d = 0xf; break;
+ }
+ if (d < 0 || base <= d)
+ break;
+ n = n * base + d;
+ }
+ if (p == buf+1)
+ *result = *buf;
+ else
+ *result = n;
+ return p;
+}
+
+
+/* Read in a filename for a `r', `w', or `s///w' command. */
+static struct buffer *read_filename (void);
+static struct buffer *
+read_filename()
+{
+ struct buffer *b;
+ int ch;
+
+ b = init_buffer();
+ ch = in_nonblank();
+ while (ch != EOF && ch != '\n')
+ {
+#if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/
+ if (posixicity == POSIXLY_EXTENDED)
+ if (ch == ';' || ch == '#')
+ {
+ savchar(ch);
+ break;
+ }
+#endif
+ ch = add_then_next(b, ch);
+ }
+ add1_buffer(b, '\0');
+ return b;
+}
+
+static struct output *get_openfile (struct output **file_ptrs, const char *mode, int fail);
+static struct output *
+get_openfile(file_ptrs, mode, fail)
+ struct output **file_ptrs;
+ const char *mode;
+ int fail;
+{
+ struct buffer *b;
+ char *file_name;
+ struct output *p;
+
+ b = read_filename();
+ file_name = get_buffer(b);
+ for (p=*file_ptrs; p; p=p->link)
+ if (strcmp(p->name, file_name) == 0)
+ break;
+
+ if (posixicity == POSIXLY_EXTENDED)
+ {
+ /* Check whether it is a special file (stdin, stdout or stderr) */
+ struct special_files *special = special_files;
+
+ /* std* sometimes are not constants, so they
+ cannot be used in the initializer for special_files */
+ my_stdin = stdin; my_stdout = stdout; my_stderr = stderr;
+ for (special = special_files; special->outf.name; special++)
+ if (strcmp(special->outf.name, file_name) == 0)
+ {
+ special->outf.fp = *special->pfp;
+ free_buffer (b);
+ return &special->outf;
+ }
+ }
+
+ if (!p)
+ {
+ p = OB_MALLOC(&obs, 1, struct output);
+ p->name = ck_strdup(file_name);
+ p->fp = ck_fopen(p->name, mode, fail);
+ p->missing_newline = false;
+ p->link = *file_ptrs;
+ *file_ptrs = p;
+ }
+ free_buffer(b);
+ return p;
+}
+
+
+static struct sed_cmd *next_cmd_entry (struct vector **vectorp);
+static struct sed_cmd *
+next_cmd_entry(vectorp)
+ struct vector **vectorp;
+{
+ struct sed_cmd *cmd;
+ struct vector *v;
+
+ v = *vectorp;
+ if (v->v_length == v->v_allocated)
+ {
+ v->v_allocated += VECTOR_ALLOC_INCREMENT;
+ v->v = REALLOC(v->v, v->v_allocated, struct sed_cmd);
+ }
+
+ cmd = v->v + v->v_length;
+ cmd->a1 = NULL;
+ cmd->a2 = NULL;
+ cmd->range_state = RANGE_INACTIVE;
+ cmd->addr_bang = false;
+ cmd->cmd = '\0'; /* something invalid, to catch bugs early */
+
+ *vectorp = v;
+ return cmd;
+}
+
+static int snarf_char_class (struct buffer *b, mbstate_t *cur_stat);
+static int
+snarf_char_class(b, cur_stat)
+ struct buffer *b;
+ mbstate_t *cur_stat;
+{
+ int ch;
+ int state = 0;
+ int delim;
+ bool pending_mb = 0;
+
+ ch = inchar();
+ if (ch == '^')
+ ch = add_then_next(b, ch);
+ if (ch == CLOSE_BRACKET)
+ ch = add_then_next(b, ch);
+
+ /* States are:
+ 0 outside a collation element, character class or collation class
+ 1 after the bracket
+ 2 after the opening ./:/=
+ 3 after the closing ./:/= */
+
+ for (;; ch = add_then_next (b, ch))
+ {
+ pending_mb = BRLEN (ch, cur_stat) != 1;
+
+ switch (ch)
+ {
+ case EOF:
+ case '\n':
+ return ch;
+
+ case '.':
+ case ':':
+ case '=':
+ if (pending_mb)
+ continue;
+
+ if (state == 1)
+ {
+ delim = ch;
+ state = 2;
+ }
+ else if (state == 2 && ch == delim)
+ state = 3;
+ else
+ break;
+
+ continue;
+
+ case OPEN_BRACKET:
+ if (pending_mb)
+ continue;
+
+ if (state == 0)
+ state = 1;
+ continue;
+
+ case CLOSE_BRACKET:
+ if (pending_mb)
+ continue;
+
+ if (state == 0 || state == 1)
+ return ch;
+ else if (state == 3)
+ state = 0;
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* Getting a character different from .=: whilst in state 1
+ goes back to state 0, getting a character different from ]
+ whilst in state 3 goes back to state 2. */
+ state &= ~1;
+ }
+}
+
+static struct buffer *match_slash (int slash, int regex);
+static struct buffer *
+match_slash(slash, regex)
+ int slash;
+ int regex;
+{
+ struct buffer *b;
+ int ch;
+ mbstate_t cur_stat;
+
+ memset (&cur_stat, 0, sizeof (mbstate_t));
+
+ /* We allow only 1 byte characters for a slash. */
+ if (BRLEN (slash, &cur_stat) == -2)
+ bad_prog (BAD_DELIM);
+
+ memset (&cur_stat, 0, sizeof (mbstate_t));
+
+ b = init_buffer();
+ while ((ch = inchar()) != EOF && ch != '\n')
+ {
+ bool pending_mb = !MBSINIT (&cur_stat);
+ if (BRLEN (ch, &cur_stat) == 1 && !pending_mb)
+ {
+ if (ch == slash)
+ return b;
+ else if (ch == '\\')
+ {
+ ch = inchar();
+ if (ch == EOF)
+ break;
+#ifndef REG_PERL
+ else if (ch == 'n' && regex)
+ ch = '\n';
+#endif
+ else if (ch != '\n' && (ch != slash || (!regex && ch == '&')))
+ add1_buffer(b, '\\');
+ }
+ else if (ch == OPEN_BRACKET && regex)
+ {
+ add1_buffer(b, ch);
+ ch = snarf_char_class(b, &cur_stat);
+ if (ch != CLOSE_BRACKET)
+ break;
+ }
+ }
+
+ add1_buffer(b, ch);
+ }
+
+ if (ch == '\n')
+ savchar(ch); /* for proper line number in error report */
+ free_buffer(b);
+ return NULL;
+}
+
+static int mark_subst_opts (struct subst *cmd);
+static int
+mark_subst_opts(cmd)
+ struct subst *cmd;
+{
+ int flags = 0;
+ int ch;
+
+ cmd->global = false;
+ cmd->print = false;
+ cmd->eval = false;
+ cmd->numb = 0;
+ cmd->outf = NULL;
+
+ for (;;)
+ switch ( (ch = in_nonblank()) )
+ {
+ case 'i': /* GNU extension */
+ case 'I': /* GNU extension */
+ if (posixicity == POSIXLY_BASIC)
+ bad_prog(_(UNKNOWN_S_OPT));
+ flags |= REG_ICASE;
+ break;
+
+#ifdef REG_PERL
+ case 's': /* GNU extension */
+ case 'S': /* GNU extension */
+ if (posixicity == POSIXLY_BASIC)
+ bad_prog(_(UNKNOWN_S_OPT));
+ if (extended_regexp_flags & REG_PERL)
+ flags |= REG_DOTALL;
+ break;
+
+ case 'x': /* GNU extension */
+ case 'X': /* GNU extension */
+ if (posixicity == POSIXLY_BASIC)
+ bad_prog(_(UNKNOWN_S_OPT));
+ if (extended_regexp_flags & REG_PERL)
+ flags |= REG_EXTENDED;
+ break;
+#endif
+
+ case 'm': /* GNU extension */
+ case 'M': /* GNU extension */
+ if (posixicity == POSIXLY_BASIC)
+ bad_prog(_(UNKNOWN_S_OPT));
+ flags |= REG_NEWLINE;
+ break;
+
+ case 'e':
+ cmd->eval = true;
+ break;
+
+ case 'p':
+ if (cmd->print)
+ bad_prog(_(EXCESS_P_OPT));
+ cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */
+ break;
+
+ case 'g':
+ if (cmd->global)
+ bad_prog(_(EXCESS_G_OPT));
+ cmd->global = true;
+ break;
+
+ case 'w':
+ cmd->outf = get_openfile(&file_write, write_mode, true);
+ return flags;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ if (cmd->numb)
+ bad_prog(_(EXCESS_N_OPT));
+ cmd->numb = in_integer(ch);
+ if (!cmd->numb)
+ bad_prog(_(ZERO_N_OPT));
+ break;
+
+ case CLOSE_BRACE:
+ case '#':
+ savchar(ch);
+ /* Fall Through */
+ case EOF:
+ case '\n':
+ case ';':
+ return flags;
+
+ case '\r':
+ if (inchar() == '\n')
+ return flags;
+ /* FALLTHROUGH */
+
+ default:
+ bad_prog(_(UNKNOWN_S_OPT));
+ /*NOTREACHED*/
+ }
+}
+
+
+/* read in a label for a `:', `b', or `t' command */
+static char *read_label (void);
+static char *
+read_label()
+{
+ struct buffer *b;
+ int ch;
+ char *ret;
+
+ b = init_buffer();
+ ch = in_nonblank();
+
+ while (ch != EOF && ch != '\n'
+ && !ISBLANK(ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#')
+ ch = add_then_next (b, ch);
+
+ savchar(ch);
+ add1_buffer(b, '\0');
+ ret = ck_strdup(get_buffer(b));
+ free_buffer(b);
+ return ret;
+}
+
+/* Store a label (or label reference) created by a `:', `b', or `t'
+ command so that the jump to/from the label can be backpatched after
+ compilation is complete, or a reference created by a `{' to be
+ backpatched when the corresponding `}' is found. */
+static struct sed_label *setup_label
+ (struct sed_label *, countT, char *, const struct error_info *);
+static struct sed_label *
+setup_label(list, idx, name, err_info)
+ struct sed_label *list;
+ countT idx;
+ char *name;
+ const struct error_info *err_info;
+{
+ struct sed_label *ret = OB_MALLOC(&obs, 1, struct sed_label);
+ ret->v_index = idx;
+ ret->name = name;
+ if (err_info)
+ memcpy(&ret->err_info, err_info, sizeof (ret->err_info));
+ ret->next = list;
+ return ret;
+}
+
+static struct sed_label *release_label (struct sed_label *list_head);
+static struct sed_label *
+release_label(list_head)
+ struct sed_label *list_head;
+{
+ struct sed_label *ret;
+
+ if (!list_head)
+ return NULL;
+ ret = list_head->next;
+
+ free(list_head->name);
+
+#if 0
+ /* We use obstacks */
+ free(list_head);
+#endif
+ return ret;
+}
+
+static struct replacement *
+new_replacement(char *text, size_t length, enum replacement_types type)
+{
+ struct replacement *r = OB_MALLOC(&obs, 1, struct replacement);
+
+ r->prefix = text;
+ r->prefix_length = length;
+ r->subst_id = -1;
+ r->repl_type = type;
+
+ /* r-> next = NULL; */
+ return r;
+}
+
+static void setup_replacement (struct subst *, const char *, size_t);
+static void
+setup_replacement(sub, text, length)
+ struct subst *sub;
+ const char *text;
+ size_t length;
+{
+ char *base;
+ char *p;
+ char *text_end;
+ enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS;
+ struct replacement root;
+ struct replacement *tail;
+
+ sub->max_id = 0;
+ base = MEMDUP(text, length, char);
+ length = normalize_text(base, length, TEXT_REPLACEMENT);
+
+ text_end = base + length;
+ tail = &root;
+
+ for (p=base; p<text_end; ++p)
+ {
+ if (*p == '\\')
+ {
+ /* Preceding the backslash may be some literal text: */
+ tail = tail->next =
+ new_replacement(base, (size_t)(p - base), repl_type);
+
+ repl_type = save_type;
+
+ /* Skip the backslash and look for a numeric back-reference,
+ or a case-munging escape if not in POSIX mode: */
+ ++p;
+ if (p == text_end)
+ ++tail->prefix_length;
+
+ else if (posixicity == POSIXLY_BASIC && !ISDIGIT (*p))
+ {
+ p[-1] = *p;
+ ++tail->prefix_length;
+ }
+
+ else
+ switch (*p)
+ {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ tail->subst_id = *p - '0';
+ if (sub->max_id < tail->subst_id)
+ sub->max_id = tail->subst_id;
+ break;
+
+ case 'L':
+ repl_type = REPL_LOWERCASE;
+ save_type = REPL_LOWERCASE;
+ break;
+
+ case 'U':
+ repl_type = REPL_UPPERCASE;
+ save_type = REPL_UPPERCASE;
+ break;
+
+ case 'E':
+ repl_type = REPL_ASIS;
+ save_type = REPL_ASIS;
+ break;
+
+ case 'l':
+ save_type = repl_type;
+ repl_type |= REPL_LOWERCASE_FIRST;
+ break;
+
+ case 'u':
+ save_type = repl_type;
+ repl_type |= REPL_UPPERCASE_FIRST;
+ break;
+
+ default:
+ p[-1] = *p;
+ ++tail->prefix_length;
+ }
+
+ base = p + 1;
+ }
+ else if (*p == '&')
+ {
+ /* Preceding the ampersand may be some literal text: */
+ tail = tail->next =
+ new_replacement(base, (size_t)(p - base), repl_type);
+
+ repl_type = save_type;
+ tail->subst_id = 0;
+ base = p + 1;
+ }
+ }
+ /* There may be some trailing literal text: */
+ if (base < text_end)
+ tail = tail->next =
+ new_replacement(base, (size_t)(text_end - base), repl_type);
+
+ tail->next = NULL;
+ sub->replacement = root.next;
+}
+
+static void read_text (struct text_buf *buf, int leadin_ch);
+static void
+read_text(buf, leadin_ch)
+ struct text_buf *buf;
+ int leadin_ch;
+{
+ int ch;
+
+ /* Should we start afresh (as opposed to continue a partial text)? */
+ if (buf)
+ {
+ if (pending_text)
+ free_buffer(pending_text);
+ pending_text = init_buffer();
+ buf->text = NULL;
+ buf->text_length = 0;
+ old_text_buf = buf;
+ }
+ /* assert(old_text_buf != NULL); */
+
+ if (leadin_ch == EOF)
+ return;
+
+ if (leadin_ch != '\n')
+ add1_buffer(pending_text, leadin_ch);
+
+ ch = inchar();
+ while (ch != EOF && ch != '\n')
+ {
+ if (ch == '\\')
+ {
+ ch = inchar();
+ if (ch != EOF)
+ add1_buffer (pending_text, '\\');
+ }
+
+ if (ch == EOF)
+ {
+ add1_buffer (pending_text, '\n');
+ return;
+ }
+
+ ch = add_then_next (pending_text, ch);
+ }
+
+ add1_buffer(pending_text, '\n');
+ if (!buf)
+ buf = old_text_buf;
+ buf->text_length = normalize_text (get_buffer (pending_text),
+ size_buffer (pending_text), TEXT_BUFFER);
+ buf->text = MEMDUP(get_buffer(pending_text), buf->text_length, char);
+ free_buffer(pending_text);
+ pending_text = NULL;
+}
+
+
+/* Try to read an address for a sed command. If it succeeds,
+ return non-zero and store the resulting address in `*addr'.
+ If the input doesn't look like an address read nothing
+ and return zero. */
+static bool compile_address (struct addr *addr, int ch);
+static bool
+compile_address(addr, ch)
+ struct addr *addr;
+ int ch;
+{
+ addr->addr_type = ADDR_IS_NULL;
+ addr->addr_step = 0;
+ addr->addr_number = ~(countT)0; /* extremely unlikely to ever match */
+ addr->addr_regex = NULL;
+
+ if (ch == '/' || ch == '\\')
+ {
+ int flags = 0;
+ struct buffer *b;
+ addr->addr_type = ADDR_IS_REGEX;
+ if (ch == '\\')
+ ch = inchar();
+ if ( !(b = match_slash(ch, true)) )
+ bad_prog(_(UNTERM_ADDR_RE));
+
+ for(;;)
+ {
+ ch = in_nonblank();
+ if (posixicity == POSIXLY_BASIC)
+ goto posix_address_modifier;
+ switch(ch)
+ {
+ case 'I': /* GNU extension */
+ flags |= REG_ICASE;
+ break;
+
+#ifdef REG_PERL
+ case 'S': /* GNU extension */
+ if (extended_regexp_flags & REG_PERL)
+ flags |= REG_DOTALL;
+ break;
+
+ case 'X': /* GNU extension */
+ if (extended_regexp_flags & REG_PERL)
+ flags |= REG_EXTENDED;
+ break;
+#endif
+
+ case 'M': /* GNU extension */
+ flags |= REG_NEWLINE;
+ break;
+
+ default:
+ posix_address_modifier:
+ savchar (ch);
+ addr->addr_regex = compile_regex (b, flags, 0);
+ free_buffer(b);
+ return true;
+ }
+ }
+ }
+ else if (ISDIGIT(ch))
+ {
+ addr->addr_number = in_integer(ch);
+ addr->addr_type = ADDR_IS_NUM;
+ ch = in_nonblank();
+ if (ch != '~' || posixicity == POSIXLY_BASIC)
+ {
+ savchar(ch);
+ }
+ else
+ {
+ countT step = in_integer(in_nonblank());
+ if (step > 0)
+ {
+ addr->addr_step = step;
+ addr->addr_type = ADDR_IS_NUM_MOD;
+ }
+ }
+ }
+ else if ((ch == '+' || ch == '~') && posixicity != POSIXLY_BASIC)
+ {
+ addr->addr_step = in_integer(in_nonblank());
+ if (addr->addr_step==0)
+ ; /* default to ADDR_IS_NULL; forces matching to stop on next line */
+ else if (ch == '+')
+ addr->addr_type = ADDR_IS_STEP;
+ else
+ addr->addr_type = ADDR_IS_STEP_MOD;
+ }
+ else if (ch == '$')
+ {
+ addr->addr_type = ADDR_IS_LAST;
+ }
+ else
+ return false;
+
+ return true;
+}
+
+/* Read a program (or a subprogram within `{' `}' pairs) in and store
+ the compiled form in `*vector'. Return a pointer to the new vector. */
+static struct vector *compile_program (struct vector *);
+static struct vector *
+compile_program(vector)
+ struct vector *vector;
+{
+ struct sed_cmd *cur_cmd;
+ struct buffer *b;
+ int ch;
+
+ if (!vector)
+ {
+ vector = MALLOC(1, struct vector);
+ vector->v = NULL;
+ vector->v_allocated = 0;
+ vector->v_length = 0;
+
+ obstack_init (&obs);
+ }
+ if (pending_text)
+ read_text(NULL, '\n');
+
+ for (;;)
+ {
+ struct addr a;
+
+ while ((ch=inchar()) == ';' || ISSPACE(ch))
+ ;
+ if (ch == EOF)
+ break;
+
+ cur_cmd = next_cmd_entry(&vector);
+ if (compile_address(&a, ch))
+ {
+ if (a.addr_type == ADDR_IS_STEP
+ || a.addr_type == ADDR_IS_STEP_MOD)
+ bad_prog(_(BAD_STEP));
+
+ cur_cmd->a1 = MEMDUP(&a, 1, struct addr);
+ ch = in_nonblank();
+ if (ch == ',')
+ {
+ if (!compile_address(&a, in_nonblank()))
+ bad_prog(_(BAD_COMMA));
+
+ cur_cmd->a2 = MEMDUP(&a, 1, struct addr);
+ ch = in_nonblank();
+ }
+
+ if ((cur_cmd->a1->addr_type == ADDR_IS_NUM
+ && cur_cmd->a1->addr_number == 0)
+ && ((!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX)
+ || posixicity == POSIXLY_BASIC))
+ bad_prog(_(INVALID_LINE_0));
+ }
+ if (ch == '!')
+ {
+ cur_cmd->addr_bang = true;
+ ch = in_nonblank();
+ if (ch == '!')
+ bad_prog(_(BAD_BANG));
+ }
+
+ /* Do not accept extended commands in --posix mode. Also,
+ a few commands only accept one address in that mode. */
+ if (posixicity == POSIXLY_BASIC)
+ switch (ch)
+ {
+ case 'e': case 'F': case 'v': case 'z': case 'L':
+ case 'Q': case 'T': case 'R': case 'W':
+ bad_command(ch);
+
+ case 'a': case 'i': case 'l':
+ case '=': case 'r':
+ if (cur_cmd->a2)
+ bad_prog(_(ONE_ADDR));
+ }
+
+ cur_cmd->cmd = ch;
+ switch (ch)
+ {
+ case '#':
+ if (cur_cmd->a1)
+ bad_prog(_(NO_SHARP_ADDR));
+ ch = inchar();
+ if (ch=='n' && first_script && cur_input.line < 2)
+ if ( (prog.base && prog.cur==2+prog.base)
+ || (prog.file && !prog.base && 2==ftell(prog.file)))
+ no_default_output = true;
+ while (ch != EOF && ch != '\n')
+ ch = inchar();
+ continue; /* restart the for (;;) loop */
+
+ case 'v':
+ /* This is an extension. Programs needing GNU sed might start
+ * with a `v' command so that other seds will stop.
+ * We compare the version and ignore POSIXLY_CORRECT.
+ */
+ {
+ char *version = read_label ();
+ char *compared_version;
+ compared_version = (*version == '\0') ? "4.0" : version;
+ if (strverscmp (compared_version, SED_FEATURE_VERSION) > 0)
+ bad_prog(_(ANCIENT_VERSION));
+
+ free (version);
+ posixicity = POSIXLY_EXTENDED;
+ }
+ continue;
+
+ case '{':
+ blocks = setup_label(blocks, vector->v_length, NULL, &cur_input);
+ cur_cmd->addr_bang = !cur_cmd->addr_bang;
+ break;
+
+ case '}':
+ if (!blocks)
+ bad_prog(_(EXCESS_CLOSE_BRACE));
+ if (cur_cmd->a1)
+ bad_prog(_(NO_CLOSE_BRACE_ADDR));
+ ch = in_nonblank();
+ if (ch == CLOSE_BRACE || ch == '#')
+ savchar(ch);
+ else if (ch != EOF && ch != '\n' && ch != ';')
+ bad_prog(_(EXCESS_JUNK));
+
+ vector->v[blocks->v_index].x.jump_index = vector->v_length;
+ blocks = release_label(blocks); /* done with this entry */
+ break;
+
+ case 'e':
+ ch = in_nonblank();
+ if (ch == EOF || ch == '\n')
+ {
+ cur_cmd->x.cmd_txt.text_length = 0;
+ break;
+ }
+ else
+ goto read_text_to_slash;
+
+ case 'a':
+ case 'i':
+ case 'c':
+ ch = in_nonblank();
+
+ read_text_to_slash:
+ if (ch == EOF)
+ bad_prog(_(EXPECTED_SLASH));
+
+ if (ch == '\\')
+ ch = inchar();
+ else
+ {
+ if (posixicity == POSIXLY_BASIC)
+ bad_prog(_(EXPECTED_SLASH));
+ savchar(ch);
+ ch = '\n';
+ }
+
+ read_text(&cur_cmd->x.cmd_txt, ch);
+ break;
+
+ case ':':
+ if (cur_cmd->a1)
+ bad_prog(_(NO_COLON_ADDR));
+ labels = setup_label(labels, vector->v_length, read_label(), NULL);
+ break;
+
+ case 'T':
+ case 'b':
+ case 't':
+ jumps = setup_label(jumps, vector->v_length, read_label(), NULL);
+ break;
+
+ case 'Q':
+ case 'q':
+ if (cur_cmd->a2)
+ bad_prog(_(ONE_ADDR));
+ /* Fall through */
+
+ case 'L':
+ case 'l':
+ ch = in_nonblank();
+ if (ISDIGIT(ch) && posixicity != POSIXLY_BASIC)
+ {
+ cur_cmd->x.int_arg = in_integer(ch);
+ ch = in_nonblank();
+ }
+ else
+ cur_cmd->x.int_arg = -1;
+
+ if (ch == CLOSE_BRACE || ch == '#')
+ savchar(ch);
+ else if (ch != EOF && ch != '\n' && ch != ';')
+ bad_prog(_(EXCESS_JUNK));
+
+ break;
+
+ case '=':
+ case 'd':
+ case 'D':
+ case 'F':
+ case 'g':
+ case 'G':
+ case 'h':
+ case 'H':
+ case 'n':
+ case 'N':
+ case 'p':
+ case 'P':
+ case 'z':
+ case 'x':
+ ch = in_nonblank();
+ if (ch == CLOSE_BRACE || ch == '#')
+ savchar(ch);
+ else if (ch != EOF && ch != '\n' && ch != ';')
+ bad_prog(_(EXCESS_JUNK));
+ break;
+
+ case 'r':
+ b = read_filename();
+ cur_cmd->x.fname = ck_strdup(get_buffer(b));
+ free_buffer(b);
+ break;
+
+ case 'R':
+ cur_cmd->x.fp = get_openfile(&file_read, read_mode, false)->fp;
+ break;
+
+ case 'W':
+ case 'w':
+ cur_cmd->x.outf = get_openfile(&file_write, write_mode, true);
+ break;
+
+ case 's':
+ {
+ struct buffer *b2;
+ int flags;
+ int slash;
+
+ slash = inchar();
+ if ( !(b = match_slash(slash, true)) )
+ bad_prog(_(UNTERM_S_CMD));
+ if ( !(b2 = match_slash(slash, false)) )
+ bad_prog(_(UNTERM_S_CMD));
+
+ cur_cmd->x.cmd_subst = OB_MALLOC(&obs, 1, struct subst);
+ setup_replacement(cur_cmd->x.cmd_subst,
+ get_buffer(b2), size_buffer(b2));
+ free_buffer(b2);
+
+ flags = mark_subst_opts(cur_cmd->x.cmd_subst);
+ cur_cmd->x.cmd_subst->regx =
+ compile_regex(b, flags, cur_cmd->x.cmd_subst->max_id + 1);
+ free_buffer(b);
+ }
+ break;
+
+ case 'y':
+ {
+ size_t len, dest_len;
+ int slash;
+ struct buffer *b2;
+ char *src_buf, *dest_buf;
+
+ slash = inchar();
+ if ( !(b = match_slash(slash, false)) )
+ bad_prog(_(UNTERM_Y_CMD));
+ src_buf = get_buffer(b);
+ len = normalize_text(src_buf, size_buffer (b), TEXT_BUFFER);
+
+ if ( !(b2 = match_slash(slash, false)) )
+ bad_prog(_(UNTERM_Y_CMD));
+ dest_buf = get_buffer(b2);
+ dest_len = normalize_text(dest_buf, size_buffer (b2), TEXT_BUFFER);
+
+ if (mb_cur_max > 1)
+ {
+ int i, j, idx, src_char_num;
+ size_t *src_lens = MALLOC(len, size_t);
+ char **trans_pairs;
+ size_t mbclen;
+ mbstate_t cur_stat;
+
+ /* Enumerate how many character the source buffer has. */
+ memset(&cur_stat, 0, sizeof(mbstate_t));
+ for (i = 0, j = 0; i < len;)
+ {
+ mbclen = MBRLEN (src_buf + i, len - i, &cur_stat);
+ /* An invalid sequence, or a truncated multibyte character.
+ We treat it as a singlebyte character. */
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2
+ || mbclen == 0)
+ mbclen = 1;
+ src_lens[j++] = mbclen;
+ i += mbclen;
+ }
+ src_char_num = j;
+
+ memset(&cur_stat, 0, sizeof(mbstate_t));
+ idx = 0;
+
+ /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL}
+ src(i) : pointer to i-th source character.
+ dest(i) : pointer to i-th destination character.
+ NULL : terminator */
+ trans_pairs = MALLOC(2 * src_char_num + 1, char*);
+ cur_cmd->x.translatemb = trans_pairs;
+ for (i = 0; i < src_char_num; i++)
+ {
+ if (idx >= dest_len)
+ bad_prog(_(Y_CMD_LEN));
+
+ /* Set the i-th source character. */
+ trans_pairs[2 * i] = MALLOC(src_lens[i] + 1, char);
+ strncpy(trans_pairs[2 * i], src_buf, src_lens[i]);
+ trans_pairs[2 * i][src_lens[i]] = '\0';
+ src_buf += src_lens[i]; /* Forward to next character. */
+
+ /* Fetch the i-th destination character. */
+ mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat);
+ /* An invalid sequence, or a truncated multibyte character.
+ We treat it as a singlebyte character. */
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2
+ || mbclen == 0)
+ mbclen = 1;
+
+ /* Set the i-th destination character. */
+ trans_pairs[2 * i + 1] = MALLOC(mbclen + 1, char);
+ strncpy(trans_pairs[2 * i + 1], dest_buf + idx, mbclen);
+ trans_pairs[2 * i + 1][mbclen] = '\0';
+ idx += mbclen; /* Forward to next character. */
+ }
+ trans_pairs[2 * i] = NULL;
+ if (idx != dest_len)
+ bad_prog(_(Y_CMD_LEN));
+ }
+ else
+ {
+ unsigned char *translate =
+ OB_MALLOC(&obs, YMAP_LENGTH, unsigned char);
+ unsigned char *ustring = (unsigned char *)src_buf;
+
+ if (len != dest_len)
+ bad_prog(_(Y_CMD_LEN));
+
+ for (len = 0; len < YMAP_LENGTH; len++)
+ translate[len] = len;
+
+ while (dest_len--)
+ translate[*ustring++] = (unsigned char)*dest_buf++;
+
+ cur_cmd->x.translate = translate;
+ }
+
+ if ((ch = in_nonblank()) != EOF && ch != '\n' && ch != ';')
+ bad_prog(_(EXCESS_JUNK));
+
+ free_buffer(b);
+ free_buffer(b2);
+ }
+ break;
+
+ case EOF:
+ bad_prog(_(NO_COMMAND));
+ /*NOTREACHED*/
+
+ default:
+ bad_command (ch);
+ /*NOTREACHED*/
+ }
+
+ /* this is buried down here so that "continue" statements will miss it */
+ ++vector->v_length;
+ }
+ if (posixicity == POSIXLY_BASIC && pending_text)
+ bad_prog (_(INCOMPLETE_CMD));
+ return vector;
+}
+
+
+/* deal with \X escapes */
+size_t
+normalize_text(buf, len, buftype)
+ char *buf;
+ size_t len;
+ enum text_types buftype;
+{
+ const char *bufend = buf + len;
+ char *p = buf;
+ char *q = buf;
+ char ch;
+ int base;
+
+ /* This variable prevents normalizing text within bracket
+ subexpressions when conforming to POSIX. If 0, we
+ are not within a bracket expression. If -1, we are within a
+ bracket expression but are not within [.FOO.], [=FOO=],
+ or [:FOO:]. Otherwise, this is the '.', '=', or ':'
+ respectively within these three types of subexpressions. */
+ int bracket_state = 0;
+
+ int mbclen;
+ mbstate_t cur_stat;
+ memset(&cur_stat, 0, sizeof(mbstate_t));
+
+ while (p < bufend)
+ {
+ mbclen = MBRLEN (p, bufend - p, &cur_stat);
+ if (mbclen != 1)
+ {
+ /* An invalid sequence, or a truncated multibyte character.
+ We treat it as a singlebyte character. */
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+ mbclen = 1;
+
+ memmove (q, p, mbclen);
+ q += mbclen;
+ p += mbclen;
+ continue;
+ }
+
+ if (*p == '\\' && p+1 < bufend && bracket_state == 0)
+ switch (*++p)
+ {
+#if defined __STDC__ && __STDC__-0
+ case 'a': *q++ = '\a'; p++; continue;
+#else /* Not STDC; we'll just assume ASCII */
+ case 'a': *q++ = '\007'; p++; continue;
+#endif
+ /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */
+ case 'f': *q++ = '\f'; p++; continue;
+ case '\n': /*fall through */
+ case 'n': *q++ = '\n'; p++; continue;
+ case 'r': *q++ = '\r'; p++; continue;
+ case 't': *q++ = '\t'; p++; continue;
+ case 'v': *q++ = '\v'; p++; continue;
+
+ case 'd': /* decimal byte */
+ base = 10;
+ goto convert;
+
+ case 'x': /* hexadecimal byte */
+ base = 16;
+ goto convert;
+
+#ifdef REG_PERL
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ if ((extended_regexp_flags & REG_PERL)
+ && p+1 < bufend
+ && p[1] >= '0' && p[1] <= '9')
+ {
+ base = 8;
+ goto convert;
+ }
+ else
+ {
+ /* we just pass the \ up one level for interpretation */
+ if (buftype != TEXT_BUFFER)
+ *q++ = '\\';
+ }
+
+ continue;
+
+ case 'o': /* octal byte */
+ if (!(extended_regexp_flags & REG_PERL))
+ {
+ base = 8;
+ goto convert;
+ }
+ else
+ {
+ /* we just pass the \ up one level for interpretation */
+ if (buftype != TEXT_BUFFER)
+ *q++ = '\\';
+ }
+
+ continue;
+#else
+ case 'o': /* octal byte */
+ base = 8;
+#endif
+convert:
+ p = convert_number(&ch, p, bufend, base);
+
+ /* for an ampersand in a replacement, pass the \ up one level */
+ if (buftype == TEXT_REPLACEMENT && ch == '&')
+ *q++ = '\\';
+ *q++ = ch;
+ continue;
+
+ case 'c':
+ if (++p < bufend)
+ {
+ *q++ = toupper((unsigned char) *p) ^ 0x40;
+ p++;
+ continue;
+ }
+ else
+ {
+ /* we just pass the \ up one level for interpretation */
+ if (buftype != TEXT_BUFFER)
+ *q++ = '\\';
+ continue;
+ }
+
+ default:
+ /* we just pass the \ up one level for interpretation */
+ if (buftype != TEXT_BUFFER)
+ *q++ = '\\';
+ break;
+ }
+ else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED)
+ switch (*p)
+ {
+ case '[':
+ if (!bracket_state)
+ bracket_state = -1;
+ break;
+
+ case ':':
+ case '.':
+ case '=':
+ if (bracket_state == -1 && p[-1] == '[')
+ bracket_state = *p;
+ break;
+
+ case ']':
+ if (bracket_state == 0)
+ ;
+ else if (bracket_state == -1)
+ bracket_state = 0;
+ else if (p[-2] != bracket_state && p[-1] == bracket_state)
+ bracket_state = -1;
+ break;
+ }
+
+ *q++ = *p++;
+ }
+ return (size_t)(q - buf);
+}
+
+
+/* `str' is a string (from the command line) that contains a sed command.
+ Compile the command, and add it to the end of `cur_program'. */
+struct vector *
+compile_string(cur_program, str, len)
+ struct vector *cur_program;
+ char *str;
+ size_t len;
+{
+ static countT string_expr_count = 0;
+ struct vector *ret;
+
+ prog.file = NULL;
+ prog.base = (unsigned char *)str;
+ prog.cur = prog.base;
+ prog.end = prog.cur + len;
+
+ cur_input.line = 0;
+ cur_input.name = NULL;
+ cur_input.string_expr_count = ++string_expr_count;
+
+ ret = compile_program(cur_program);
+ prog.base = NULL;
+ prog.cur = NULL;
+ prog.end = NULL;
+
+ first_script = false;
+ return ret;
+}
+
+/* `cmdfile' is the name of a file containing sed commands.
+ Read them in and add them to the end of `cur_program'.
+ */
+struct vector *
+compile_file(cur_program, cmdfile)
+ struct vector *cur_program;
+ const char *cmdfile;
+{
+ struct vector *ret;
+
+ prog.file = stdin;
+ if (cmdfile[0] != '-' || cmdfile[1] != '\0')
+ {
+#ifdef HAVE_FOPEN_RT
+ prog.file = ck_fopen(cmdfile, "rt", true);
+#else
+ prog.file = ck_fopen(cmdfile, "r", true);
+#endif
+ }
+
+ cur_input.line = 1;
+ cur_input.name = cmdfile;
+ cur_input.string_expr_count = 0;
+
+ ret = compile_program(cur_program);
+ if (prog.file != stdin)
+ ck_fclose(prog.file);
+ prog.file = NULL;
+
+ first_script = false;
+ return ret;
+}
+
+/* Make any checks which require the whole program to have been read.
+ In particular: this backpatches the jump targets.
+ Any cleanup which can be done after these checks is done here also. */
+void
+check_final_program(program)
+ struct vector *program;
+{
+ struct sed_label *go;
+ struct sed_label *lbl;
+
+ /* do all "{"s have a corresponding "}"? */
+ if (blocks)
+ {
+ /* update info for error reporting: */
+ memcpy(&cur_input, &blocks->err_info, sizeof (cur_input));
+ bad_prog(_(EXCESS_OPEN_BRACE));
+ }
+
+ /* was the final command an unterminated a/c/i command? */
+ if (pending_text)
+ {
+ old_text_buf->text_length = size_buffer(pending_text);
+ if (old_text_buf->text_length)
+ old_text_buf->text = MEMDUP(get_buffer(pending_text),
+ old_text_buf->text_length, char);
+ free_buffer(pending_text);
+ pending_text = NULL;
+ }
+
+ for (go = jumps; go; go = release_label(go))
+ {
+ for (lbl = labels; lbl; lbl = lbl->next)
+ if (strcmp(lbl->name, go->name) == 0)
+ break;
+ if (lbl)
+ {
+ program->v[go->v_index].x.jump_index = lbl->v_index;
+ }
+ else
+ {
+ if (*go->name)
+ panic(_("can't find label for jump to `%s'"), go->name);
+ program->v[go->v_index].x.jump_index = program->v_length;
+ }
+ }
+ jumps = NULL;
+
+ for (lbl = labels; lbl; lbl = release_label(lbl))
+ ;
+ labels = NULL;
+
+ /* There is no longer a need to track file names: */
+ {
+ struct output *p;
+
+ for (p=file_read; p; p=p->link)
+ if (p->name)
+ {
+ free(p->name);
+ p->name = NULL;
+ }
+
+ for (p=file_write; p; p=p->link)
+ if (p->name)
+ {
+ free(p->name);
+ p->name = NULL;
+ }
+ }
+}
+
+/* Rewind all resources which were allocated in this module. */
+void
+rewind_read_files()
+{
+ struct output *p;
+
+ for (p=file_read; p; p=p->link)
+ if (p->fp)
+ rewind(p->fp);
+}
+
+/* Release all resources which were allocated in this module. */
+void
+finish_program(program)
+ struct vector *program;
+{
+ /* close all files... */
+ {
+ struct output *p, *q;
+
+ for (p=file_read; p; p=q)
+ {
+ if (p->fp)
+ ck_fclose(p->fp);
+ q = p->link;
+#if 0
+ /* We use obstacks. */
+ free(p);
+#endif
+ }
+
+ for (p=file_write; p; p=q)
+ {
+ if (p->fp)
+ ck_fclose(p->fp);
+ q = p->link;
+#if 0
+ /* We use obstacks. */
+ free(p);
+#endif
+ }
+ file_read = file_write = NULL;
+ }
+
+#ifdef DEBUG_LEAKS
+ obstack_free (&obs, NULL);
+#endif /*DEBUG_LEAKS*/
+}