summaryrefslogtreecommitdiff
path: root/toke.c
diff options
context:
space:
mode:
Diffstat (limited to 'toke.c')
-rw-r--r--toke.c1335
1 files changed, 1335 insertions, 0 deletions
diff --git a/toke.c b/toke.c
new file mode 100644
index 0000000000..35be332ea8
--- /dev/null
+++ b/toke.c
@@ -0,0 +1,1335 @@
+/* $Header: toke.c,v 2.0.1.1 88/06/28 16:39:50 root Exp $
+ *
+ * $Log: toke.c,v $
+ * Revision 2.0.1.1 88/06/28 16:39:50 root
+ * patch1: tr/x/y/ can dump core if y is shorter than x
+ *
+ * Revision 2.0 88/06/05 00:11:16 root
+ * Baseline version 2.0.
+ *
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "perly.h"
+
+#define CLINE (cmdline = (line < cmdline ? line : cmdline))
+
+#define RETURN(retval) return (bufptr = s,(int)retval)
+#define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
+#define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
+#define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
+#define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s,(int)UNIOP)
+#define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
+#define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
+#define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
+#define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
+#define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
+#define SFUN(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)STABFUN)
+#define LFUN(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LVALFUN)
+
+yylex()
+{
+ register char *s = bufptr;
+ register char *d;
+ register int tmp;
+ static bool in_format = FALSE;
+ static bool firstline = TRUE;
+
+ retry:
+#ifdef YYDEBUG
+ if (yydebug)
+ if (index(s,'\n'))
+ fprintf(stderr,"Tokener at %s",s);
+ else
+ fprintf(stderr,"Tokener at %s\n",s);
+#endif
+ switch (*s) {
+ default:
+ fprintf(stderr,
+ "Unrecognized character %c in file %s line %ld--ignoring.\n",
+ *s++,filename,(long)line);
+ goto retry;
+ case 0:
+ s = str_get(linestr);
+ *s = '\0';
+ if (firstline && (minus_n || minus_p)) {
+ firstline = FALSE;
+ str_set(linestr,"line: while (<>) {");
+ if (minus_a)
+ str_cat(linestr,"@F=split(' ');");
+ s = str_get(linestr);
+ goto retry;
+ }
+ if (!rsfp)
+ RETURN(0);
+ if (in_format) {
+ yylval.formval = load_format(); /* leaves . in buffer */
+ in_format = FALSE;
+ s = str_get(linestr);
+ TERM(FORMLIST);
+ }
+ line++;
+ if ((s = str_gets(linestr, rsfp)) == Nullch) {
+ if (preprocess)
+ pclose(rsfp);
+ else if (rsfp != stdin)
+ fclose(rsfp);
+ rsfp = Nullfp;
+ if (minus_n || minus_p) {
+ str_set(linestr,minus_p ? "}continue{print;" : "");
+ str_cat(linestr,"}");
+ s = str_get(linestr);
+ goto retry;
+ }
+ s = str_get(linestr);
+ RETURN(0);
+ }
+#ifdef DEBUG
+ else if (firstline) {
+ char *showinput();
+ s = showinput();
+ }
+#endif
+ firstline = FALSE;
+ goto retry;
+ case ' ': case '\t': case '\f':
+ s++;
+ goto retry;
+ case '\n':
+ case '#':
+ if (preprocess && s == str_get(linestr) &&
+ s[1] == ' ' && isdigit(s[2])) {
+ line = atoi(s+2)-1;
+ for (s += 2; isdigit(*s); s++) ;
+ while (*s && isspace(*s)) s++;
+ if (filename)
+ safefree(filename);
+ s[strlen(s)-1] = '\0'; /* wipe out newline */
+ if (*s == '"') {
+ s++;
+ s[strlen(s)-1] = '\0'; /* wipe out trailing quote */
+ }
+ if (*s)
+ filename = savestr(s);
+ else
+ filename = savestr(origfilename);
+ s = str_get(linestr);
+ }
+ if (in_eval) {
+ while (*s && *s != '\n')
+ s++;
+ if (*s)
+ s++;
+ line++;
+ }
+ else
+ *s = '\0';
+ if (lex_newlines)
+ RETURN('\n');
+ goto retry;
+ case '-':
+ if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
+ s++;
+ switch (*s++) {
+ case 'r': FTST(O_FTEREAD);
+ case 'w': FTST(O_FTEWRITE);
+ case 'x': FTST(O_FTEEXEC);
+ case 'o': FTST(O_FTEOWNED);
+ case 'R': FTST(O_FTRREAD);
+ case 'W': FTST(O_FTRWRITE);
+ case 'X': FTST(O_FTREXEC);
+ case 'O': FTST(O_FTROWNED);
+ case 'e': FTST(O_FTIS);
+ case 'z': FTST(O_FTZERO);
+ case 's': FTST(O_FTSIZE);
+ case 'f': FTST(O_FTFILE);
+ case 'd': FTST(O_FTDIR);
+ case 'l': FTST(O_FTLINK);
+ case 'p': FTST(O_FTPIPE);
+ case 'S': FTST(O_FTSOCK);
+ case 'u': FTST(O_FTSUID);
+ case 'g': FTST(O_FTSGID);
+ case 'k': FTST(O_FTSVTX);
+ case 'b': FTST(O_FTBLK);
+ case 'c': FTST(O_FTCHR);
+ case 't': FTST(O_FTTTY);
+ case 'T': FTST(O_FTTEXT);
+ case 'B': FTST(O_FTBINARY);
+ default:
+ s -= 2;
+ break;
+ }
+ }
+ /*FALL THROUGH*/
+ case '+':
+ if (s[1] == *s) {
+ s++;
+ if (*s++ == '+')
+ RETURN(INC);
+ else
+ RETURN(DEC);
+ }
+ /* FALL THROUGH */
+ case '*':
+ case '%':
+ case '^':
+ case '~':
+ case '(':
+ case ',':
+ case ':':
+ case '[':
+ tmp = *s++;
+ OPERATOR(tmp);
+ case '{':
+ tmp = *s++;
+ if (isspace(*s) || *s == '#')
+ cmdline = NOLINE; /* invalidate current command line number */
+ OPERATOR(tmp);
+ case ';':
+ if (line < cmdline)
+ cmdline = line;
+ tmp = *s++;
+ OPERATOR(tmp);
+ case ')':
+ case ']':
+ tmp = *s++;
+ TERM(tmp);
+ case '}':
+ tmp = *s++;
+ for (d = s; *d == ' ' || *d == '\t'; d++) ;
+ if (*d == '\n' || *d == '#')
+ OPERATOR(tmp); /* block end */
+ else
+ TERM(tmp); /* associative array end */
+ case '&':
+ s++;
+ tmp = *s++;
+ if (tmp == '&')
+ OPERATOR(ANDAND);
+ s--;
+ OPERATOR('&');
+ case '|':
+ s++;
+ tmp = *s++;
+ if (tmp == '|')
+ OPERATOR(OROR);
+ s--;
+ OPERATOR('|');
+ case '=':
+ s++;
+ tmp = *s++;
+ if (tmp == '=')
+ OPERATOR(EQ);
+ if (tmp == '~')
+ OPERATOR(MATCH);
+ s--;
+ OPERATOR('=');
+ case '!':
+ s++;
+ tmp = *s++;
+ if (tmp == '=')
+ OPERATOR(NE);
+ if (tmp == '~')
+ OPERATOR(NMATCH);
+ s--;
+ OPERATOR('!');
+ case '<':
+ if (expectterm) {
+ s = scanstr(s);
+ TERM(RSTRING);
+ }
+ s++;
+ tmp = *s++;
+ if (tmp == '<')
+ OPERATOR(LS);
+ if (tmp == '=')
+ OPERATOR(LE);
+ s--;
+ OPERATOR('<');
+ case '>':
+ s++;
+ tmp = *s++;
+ if (tmp == '>')
+ OPERATOR(RS);
+ if (tmp == '=')
+ OPERATOR(GE);
+ s--;
+ OPERATOR('>');
+
+#define SNARFWORD \
+ d = tokenbuf; \
+ while (isalpha(*s) || isdigit(*s) || *s == '_') \
+ *d++ = *s++; \
+ *d = '\0'; \
+ d = tokenbuf;
+
+ case '$':
+ if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
+ s++;
+ s = scanreg(s,tokenbuf);
+ yylval.stabval = aadd(stabent(tokenbuf,TRUE));
+ TERM(ARYLEN);
+ }
+ s = scanreg(s,tokenbuf);
+ yylval.stabval = stabent(tokenbuf,TRUE);
+ TERM(REG);
+
+ case '@':
+ s = scanreg(s,tokenbuf);
+ yylval.stabval = aadd(stabent(tokenbuf,TRUE));
+ TERM(ARY);
+
+ case '/': /* may either be division or pattern */
+ case '?': /* may either be conditional or pattern */
+ if (expectterm) {
+ s = scanpat(s);
+ TERM(PATTERN);
+ }
+ tmp = *s++;
+ OPERATOR(tmp);
+
+ case '.':
+ if (!expectterm || !isdigit(s[1])) {
+ s++;
+ tmp = *s++;
+ if (tmp == '.')
+ OPERATOR(DOTDOT);
+ s--;
+ OPERATOR('.');
+ }
+ /* FALL THROUGH */
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '\'': case '"': case '`':
+ s = scanstr(s);
+ TERM(RSTRING);
+
+ case '_':
+ SNARFWORD;
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'a': case 'A':
+ SNARFWORD;
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'b': case 'B':
+ SNARFWORD;
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'c': case 'C':
+ SNARFWORD;
+ if (strEQ(d,"continue"))
+ OPERATOR(CONTINUE);
+ if (strEQ(d,"chdir"))
+ UNI(O_CHDIR);
+ if (strEQ(d,"close"))
+ OPERATOR(CLOSE);
+ if (strEQ(d,"crypt"))
+ FUN2(O_CRYPT);
+ if (strEQ(d,"chop"))
+ LFUN(O_CHOP);
+ if (strEQ(d,"chmod")) {
+ yylval.ival = O_CHMOD;
+ OPERATOR(LISTOP);
+ }
+ if (strEQ(d,"chown")) {
+ yylval.ival = O_CHOWN;
+ OPERATOR(LISTOP);
+ }
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'd': case 'D':
+ SNARFWORD;
+ if (strEQ(d,"do"))
+ OPERATOR(DO);
+ if (strEQ(d,"die"))
+ UNI(O_DIE);
+ if (strEQ(d,"delete"))
+ OPERATOR(DELETE);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'e': case 'E':
+ SNARFWORD;
+ if (strEQ(d,"else"))
+ OPERATOR(ELSE);
+ if (strEQ(d,"elsif")) {
+ yylval.ival = line;
+ OPERATOR(ELSIF);
+ }
+ if (strEQ(d,"eq") || strEQ(d,"EQ"))
+ OPERATOR(SEQ);
+ if (strEQ(d,"exit"))
+ UNI(O_EXIT);
+ if (strEQ(d,"eval")) {
+ allstabs = TRUE; /* must initialize everything since */
+ UNI(O_EVAL); /* we don't know what will be used */
+ }
+ if (strEQ(d,"eof"))
+ TERM(FEOF);
+ if (strEQ(d,"exp"))
+ FUN1(O_EXP);
+ if (strEQ(d,"each"))
+ SFUN(O_EACH);
+ if (strEQ(d,"exec")) {
+ yylval.ival = O_EXEC;
+ OPERATOR(LISTOP);
+ }
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'f': case 'F':
+ SNARFWORD;
+ if (strEQ(d,"for"))
+ OPERATOR(FOR);
+ if (strEQ(d,"foreach"))
+ OPERATOR(FOR);
+ if (strEQ(d,"format")) {
+ in_format = TRUE;
+ OPERATOR(FORMAT);
+ }
+ if (strEQ(d,"fork"))
+ FUN0(O_FORK);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'g': case 'G':
+ SNARFWORD;
+ if (strEQ(d,"gt") || strEQ(d,"GT"))
+ OPERATOR(SGT);
+ if (strEQ(d,"ge") || strEQ(d,"GE"))
+ OPERATOR(SGE);
+ if (strEQ(d,"goto"))
+ LOOPX(O_GOTO);
+ if (strEQ(d,"gmtime"))
+ FUN1(O_GMTIME);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'h': case 'H':
+ SNARFWORD;
+ if (strEQ(d,"hex"))
+ FUN1(O_HEX);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'i': case 'I':
+ SNARFWORD;
+ if (strEQ(d,"if")) {
+ yylval.ival = line;
+ OPERATOR(IF);
+ }
+ if (strEQ(d,"index"))
+ FUN2(O_INDEX);
+ if (strEQ(d,"int"))
+ FUN1(O_INT);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'j': case 'J':
+ SNARFWORD;
+ if (strEQ(d,"join"))
+ OPERATOR(JOIN);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'k': case 'K':
+ SNARFWORD;
+ if (strEQ(d,"keys"))
+ SFUN(O_KEYS);
+ if (strEQ(d,"kill")) {
+ yylval.ival = O_KILL;
+ OPERATOR(LISTOP);
+ }
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'l': case 'L':
+ SNARFWORD;
+ if (strEQ(d,"last"))
+ LOOPX(O_LAST);
+ if (strEQ(d,"local"))
+ OPERATOR(LOCAL);
+ if (strEQ(d,"length"))
+ FUN1(O_LENGTH);
+ if (strEQ(d,"lt") || strEQ(d,"LT"))
+ OPERATOR(SLT);
+ if (strEQ(d,"le") || strEQ(d,"LE"))
+ OPERATOR(SLE);
+ if (strEQ(d,"localtime"))
+ FUN1(O_LOCALTIME);
+ if (strEQ(d,"log"))
+ FUN1(O_LOG);
+ if (strEQ(d,"link"))
+ FUN2(O_LINK);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'm': case 'M':
+ SNARFWORD;
+ if (strEQ(d,"m")) {
+ s = scanpat(s-1);
+ TERM(PATTERN);
+ }
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'n': case 'N':
+ SNARFWORD;
+ if (strEQ(d,"next"))
+ LOOPX(O_NEXT);
+ if (strEQ(d,"ne") || strEQ(d,"NE"))
+ OPERATOR(SNE);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'o': case 'O':
+ SNARFWORD;
+ if (strEQ(d,"open"))
+ OPERATOR(OPEN);
+ if (strEQ(d,"ord"))
+ FUN1(O_ORD);
+ if (strEQ(d,"oct"))
+ FUN1(O_OCT);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'p': case 'P':
+ SNARFWORD;
+ if (strEQ(d,"print")) {
+ yylval.ival = O_PRINT;
+ OPERATOR(LISTOP);
+ }
+ if (strEQ(d,"printf")) {
+ yylval.ival = O_PRTF;
+ OPERATOR(LISTOP);
+ }
+ if (strEQ(d,"push")) {
+ yylval.ival = O_PUSH;
+ OPERATOR(PUSH);
+ }
+ if (strEQ(d,"pop"))
+ OPERATOR(POP);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'q': case 'Q':
+ SNARFWORD;
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'r': case 'R':
+ SNARFWORD;
+ if (strEQ(d,"reset"))
+ UNI(O_RESET);
+ if (strEQ(d,"redo"))
+ LOOPX(O_REDO);
+ if (strEQ(d,"rename"))
+ FUN2(O_RENAME);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 's': case 'S':
+ SNARFWORD;
+ if (strEQ(d,"s")) {
+ s = scansubst(s);
+ TERM(SUBST);
+ }
+ if (strEQ(d,"shift"))
+ TERM(SHIFT);
+ if (strEQ(d,"split"))
+ TERM(SPLIT);
+ if (strEQ(d,"substr"))
+ FUN3(O_SUBSTR);
+ if (strEQ(d,"sprintf"))
+ OPERATOR(SPRINTF);
+ if (strEQ(d,"sub"))
+ OPERATOR(SUB);
+ if (strEQ(d,"select"))
+ OPERATOR(SELECT);
+ if (strEQ(d,"seek"))
+ OPERATOR(SEEK);
+ if (strEQ(d,"stat"))
+ OPERATOR(STAT);
+ if (strEQ(d,"study")) {
+ sawstudy++;
+ LFUN(O_STUDY);
+ }
+ if (strEQ(d,"sqrt"))
+ FUN1(O_SQRT);
+ if (strEQ(d,"sleep"))
+ UNI(O_SLEEP);
+ if (strEQ(d,"system")) {
+ yylval.ival = O_SYSTEM;
+ OPERATOR(LISTOP);
+ }
+ if (strEQ(d,"symlink"))
+ FUN2(O_SYMLINK);
+ if (strEQ(d,"sort")) {
+ yylval.ival = O_SORT;
+ OPERATOR(LISTOP);
+ }
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 't': case 'T':
+ SNARFWORD;
+ if (strEQ(d,"tr")) {
+ s = scantrans(s);
+ TERM(TRANS);
+ }
+ if (strEQ(d,"tell"))
+ TERM(TELL);
+ if (strEQ(d,"time"))
+ FUN0(O_TIME);
+ if (strEQ(d,"times"))
+ FUN0(O_TMS);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'u': case 'U':
+ SNARFWORD;
+ if (strEQ(d,"using"))
+ OPERATOR(USING);
+ if (strEQ(d,"until")) {
+ yylval.ival = line;
+ OPERATOR(UNTIL);
+ }
+ if (strEQ(d,"unless")) {
+ yylval.ival = line;
+ OPERATOR(UNLESS);
+ }
+ if (strEQ(d,"umask"))
+ FUN1(O_UMASK);
+ if (strEQ(d,"unshift")) {
+ yylval.ival = O_UNSHIFT;
+ OPERATOR(PUSH);
+ }
+ if (strEQ(d,"unlink")) {
+ yylval.ival = O_UNLINK;
+ OPERATOR(LISTOP);
+ }
+ if (strEQ(d,"utime")) {
+ yylval.ival = O_UTIME;
+ OPERATOR(LISTOP);
+ }
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'v': case 'V':
+ SNARFWORD;
+ if (strEQ(d,"values"))
+ SFUN(O_VALUES);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'w': case 'W':
+ SNARFWORD;
+ if (strEQ(d,"write"))
+ TERM(WRITE);
+ if (strEQ(d,"while")) {
+ yylval.ival = line;
+ OPERATOR(WHILE);
+ }
+ if (strEQ(d,"wait"))
+ FUN0(O_WAIT);
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'x': case 'X':
+ SNARFWORD;
+ if (!expectterm && strEQ(d,"x"))
+ OPERATOR('x');
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'y': case 'Y':
+ SNARFWORD;
+ if (strEQ(d,"y")) {
+ s = scantrans(s);
+ TERM(TRANS);
+ }
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ case 'z': case 'Z':
+ SNARFWORD;
+ yylval.cval = savestr(d);
+ OPERATOR(WORD);
+ }
+}
+
+char *
+scanreg(s,dest)
+register char *s;
+char *dest;
+{
+ register char *d;
+
+ s++;
+ d = dest;
+ if (isdigit(*s)) {
+ while (isdigit(*s) || *s == '_')
+ *d++ = *s++;
+ }
+ else {
+ while (isalpha(*s) || isdigit(*s) || *s == '_')
+ *d++ = *s++;
+ }
+ *d = '\0';
+ d = dest;
+ if (!*d) {
+ *d = *s++;
+ if (*d == '{') {
+ d = dest;
+ while (*s && *s != '}')
+ *d++ = *s++;
+ *d = '\0';
+ d = dest;
+ if (*s)
+ s++;
+ }
+ else
+ d[1] = '\0';
+ }
+ if (*d == '^' && !isspace(*s))
+ *d = *s++ & 31;
+ return s;
+}
+
+STR *
+scanconst(string)
+char *string;
+{
+ register STR *retstr;
+ register char *t;
+ register char *d;
+
+ if (index(string,'|')) {
+ return Nullstr;
+ }
+ retstr = str_make(string);
+ t = str_get(retstr);
+ *(long*)&retstr->str_nval = 100;
+ for (d=t; *d; ) {
+ switch (*d) {
+ case '.': case '[': case '$': case '(': case ')': case '|':
+ *d = '\0';
+ break;
+ case '\\':
+ if (index("wWbB0123456789sSdD",d[1])) {
+ *d = '\0';
+ break;
+ }
+ strcpy(d,d+1);
+ switch(*d) {
+ case 'n':
+ *d = '\n';
+ break;
+ case 't':
+ *d = '\t';
+ break;
+ case 'f':
+ *d = '\f';
+ break;
+ case 'r':
+ *d = '\r';
+ break;
+ }
+ /* FALL THROUGH */
+ default:
+ if (d[1] == '*' || d[1] == '+' || d[1] == '?') {
+ *d = '\0';
+ break;
+ }
+ d++;
+ }
+ }
+ if (!*t) {
+ str_free(retstr);
+ return Nullstr;
+ }
+ retstr->str_cur = strlen(retstr->str_ptr);
+ return retstr;
+}
+
+char *
+scanpat(s)
+register char *s;
+{
+ register SPAT *spat = (SPAT *) safemalloc(sizeof (SPAT));
+ register char *d;
+
+ bzero((char *)spat, sizeof(SPAT));
+ spat->spat_next = spat_root; /* link into spat list */
+ spat_root = spat;
+
+ switch (*s++) {
+ case 'm':
+ s++;
+ break;
+ case '/':
+ break;
+ case '?':
+ spat->spat_flags |= SPAT_ONCE;
+ break;
+ default:
+ fatal("panic: scanpat");
+ }
+ s = cpytill(tokenbuf,s,s[-1]);
+ if (!*s)
+ fatal("Search pattern not terminated");
+ s++;
+ if (*s == 'i') {
+ s++;
+ spat->spat_flags |= SPAT_FOLD;
+ }
+ for (d=tokenbuf; *d; d++) {
+ if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
+ register ARG *arg;
+
+ spat->spat_runtime = arg = op_new(1);
+ arg->arg_type = O_ITEM;
+ arg[1].arg_type = A_DOUBLE;
+ arg[1].arg_ptr.arg_str = str_make(tokenbuf);
+ goto got_pat; /* skip compiling for now */
+ }
+ }
+ if (!(spat->spat_flags & SPAT_FOLD)) {
+ if (*tokenbuf == '^') {
+ spat->spat_short = scanconst(tokenbuf+1);
+ if (spat->spat_short) {
+ spat->spat_slen = strlen(spat->spat_short->str_ptr);
+ if (spat->spat_slen == strlen(tokenbuf+1))
+ spat->spat_flags |= SPAT_ALL;
+ }
+ }
+ else {
+ spat->spat_flags |= SPAT_SCANFIRST;
+ spat->spat_short = scanconst(tokenbuf);
+ if (spat->spat_short) {
+ spat->spat_slen = strlen(spat->spat_short->str_ptr);
+ if (spat->spat_slen == strlen(tokenbuf))
+ spat->spat_flags |= SPAT_ALL;
+ }
+ }
+ }
+ spat->spat_regexp = regcomp(tokenbuf,spat->spat_flags & SPAT_FOLD,1);
+ hoistmust(spat);
+ got_pat:
+ yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
+ return s;
+}
+
+char *
+scansubst(s)
+register char *s;
+{
+ register SPAT *spat = (SPAT *) safemalloc(sizeof (SPAT));
+ register char *d;
+
+ bzero((char *)spat, sizeof(SPAT));
+ spat->spat_next = spat_root; /* link into spat list */
+ spat_root = spat;
+
+ s = cpytill(tokenbuf,s+1,*s);
+ if (!*s)
+ fatal("Substitution pattern not terminated");
+ for (d=tokenbuf; *d; d++) {
+ if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
+ register ARG *arg;
+
+ spat->spat_runtime = arg = op_new(1);
+ arg->arg_type = O_ITEM;
+ arg[1].arg_type = A_DOUBLE;
+ arg[1].arg_ptr.arg_str = str_make(tokenbuf);
+ goto get_repl; /* skip compiling for now */
+ }
+ }
+ if (*tokenbuf == '^') {
+ spat->spat_short = scanconst(tokenbuf+1);
+ if (spat->spat_short)
+ spat->spat_slen = strlen(spat->spat_short->str_ptr);
+ }
+ else {
+ spat->spat_flags |= SPAT_SCANFIRST;
+ spat->spat_short = scanconst(tokenbuf);
+ if (spat->spat_short)
+ spat->spat_slen = strlen(spat->spat_short->str_ptr);
+ }
+ d = savestr(tokenbuf);
+get_repl:
+ s = scanstr(s);
+ if (!*s)
+ fatal("Substitution replacement not terminated");
+ spat->spat_repl = yylval.arg;
+ spat->spat_flags |= SPAT_ONCE;
+ while (*s == 'g' || *s == 'i') {
+ if (*s == 'g') {
+ s++;
+ spat->spat_flags &= ~SPAT_ONCE;
+ }
+ if (*s == 'i') {
+ s++;
+ spat->spat_flags |= SPAT_FOLD;
+ }
+ }
+ if (!spat->spat_runtime) {
+ spat->spat_regexp = regcomp(d, spat->spat_flags & SPAT_FOLD,1);
+ hoistmust(spat);
+ safefree(d);
+ }
+ if (spat->spat_flags & SPAT_FOLD) { /* Oops, disable optimization */
+ str_free(spat->spat_short);
+ spat->spat_short = Nullstr;
+ spat->spat_slen = 0;
+ }
+ yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
+ return s;
+}
+
+hoistmust(spat)
+register SPAT *spat;
+{
+ if (spat->spat_regexp->regmust) { /* is there a better short-circuit? */
+ if (spat->spat_short &&
+ strEQ(spat->spat_short->str_ptr,spat->spat_regexp->regmust->str_ptr)){
+ if (spat->spat_flags & SPAT_SCANFIRST) {
+ str_free(spat->spat_short);
+ spat->spat_short = Nullstr;
+ }
+ else {
+ str_free(spat->spat_regexp->regmust);
+ spat->spat_regexp->regmust = Nullstr;
+ return;
+ }
+ }
+ if (!spat->spat_short || /* promote the better string */
+ ((spat->spat_flags & SPAT_SCANFIRST) &&
+ (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
+ str_free(spat->spat_short); /* ok if null */
+ spat->spat_short = spat->spat_regexp->regmust;
+ spat->spat_regexp->regmust = Nullstr;
+ spat->spat_flags |= SPAT_SCANFIRST;
+ }
+ }
+}
+
+char *
+expand_charset(s)
+register char *s;
+{
+ char t[512];
+ register char *d = t;
+ register int i;
+
+ while (*s) {
+ if (s[1] == '-' && s[2]) {
+ for (i = s[0]; i <= s[2]; i++)
+ *d++ = i;
+ s += 3;
+ }
+ else
+ *d++ = *s++;
+ }
+ *d = '\0';
+ return savestr(t);
+}
+
+char *
+scantrans(s)
+register char *s;
+{
+ ARG *arg =
+ l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg,0));
+ register char *t;
+ register char *r;
+ register char *tbl = safemalloc(256);
+ register int i;
+ register int j;
+
+ arg[2].arg_type = A_NULL;
+ arg[2].arg_ptr.arg_cval = tbl;
+ for (i=0; i<256; i++)
+ tbl[i] = 0;
+ s = scanstr(s);
+ if (!*s)
+ fatal("Translation pattern not terminated");
+ t = expand_charset(str_get(yylval.arg[1].arg_ptr.arg_str));
+ free_arg(yylval.arg);
+ s = scanstr(s-1);
+ if (!*s)
+ fatal("Translation replacement not terminated");
+ r = expand_charset(str_get(yylval.arg[1].arg_ptr.arg_str));
+ free_arg(yylval.arg);
+ yylval.arg = arg;
+ if (!*r) {
+ safefree(r);
+ r = t;
+ }
+ for (i = 0, j = 0; t[i]; i++,j++) {
+ if (!r[j])
+ --j;
+ tbl[t[i] & 0377] = r[j];
+ }
+ if (r != t)
+ safefree(r);
+ safefree(t);
+ return s;
+}
+
+char *
+scanstr(s)
+register char *s;
+{
+ register char term;
+ register char *d;
+ register ARG *arg;
+ register bool makesingle = FALSE;
+ register STAB *stab;
+ char *leave = "\\$nrtfb0123456789"; /* which backslash sequences to keep */
+
+ arg = op_new(1);
+ yylval.arg = arg;
+ arg->arg_type = O_ITEM;
+
+ switch (*s) {
+ default: /* a substitution replacement */
+ arg[1].arg_type = A_DOUBLE;
+ makesingle = TRUE; /* maybe disable runtime scanning */
+ term = *s;
+ if (term == '\'')
+ leave = Nullch;
+ goto snarf_it;
+ case '0':
+ {
+ long i;
+ int shift;
+
+ arg[1].arg_type = A_SINGLE;
+ if (s[1] == 'x') {
+ shift = 4;
+ s += 2;
+ }
+ else if (s[1] == '.')
+ goto decimal;
+ else
+ shift = 3;
+ i = 0;
+ for (;;) {
+ switch (*s) {
+ default:
+ goto out;
+ case '8': case '9':
+ if (shift != 4)
+ fatal("Illegal octal digit");
+ /* FALL THROUGH */
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
+ i <<= shift;
+ i += *s++ & 15;
+ break;
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ if (shift != 4)
+ goto out;
+ i <<= 4;
+ i += (*s++ & 7) + 9;
+ break;
+ }
+ }
+ out:
+ sprintf(tokenbuf,"%ld",i);
+ arg[1].arg_ptr.arg_str = str_make(tokenbuf);
+ }
+ break;
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9': case '.':
+ decimal:
+ arg[1].arg_type = A_SINGLE;
+ d = tokenbuf;
+ while (isdigit(*s) || *s == '_') {
+ if (*s == '_')
+ s++;
+ else
+ *d++ = *s++;
+ }
+ if (*s == '.' && index("0123456789eE",s[1])) {
+ *d++ = *s++;
+ while (isdigit(*s) || *s == '_') {
+ if (*s == '_')
+ s++;
+ else
+ *d++ = *s++;
+ }
+ }
+ if (index("eE",*s) && index("+-0123456789",s[1])) {
+ *d++ = *s++;
+ if (*s == '+' || *s == '-')
+ *d++ = *s++;
+ while (isdigit(*s))
+ *d++ = *s++;
+ }
+ *d = '\0';
+ arg[1].arg_ptr.arg_str = str_make(tokenbuf);
+ break;
+ case '\'':
+ arg[1].arg_type = A_SINGLE;
+ term = *s;
+ leave = Nullch;
+ goto snarf_it;
+
+ case '<':
+ d = tokenbuf;
+ s = cpytill(d,s+1,'>');
+ if (*s)
+ s++;
+ if (*d == '$') d++;
+ while (*d && (isalpha(*d) || isdigit(*d) || *d == '_')) d++;
+ if (*d) {
+ d = tokenbuf;
+ arg[1].arg_type = A_GLOB;
+ d = savestr(d);
+ arg[1].arg_ptr.arg_stab = stab = genstab();
+ stab->stab_io = stio_new();
+ stab->stab_val = str_make(d);
+ }
+ else {
+ d = tokenbuf;
+ if (!*d)
+ strcpy(d,"ARGV");
+ if (*d == '$') {
+ arg[1].arg_type = A_INDREAD;
+ arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
+ }
+ else {
+ arg[1].arg_type = A_READ;
+ if (rsfp == stdin && strEQ(d,"stdin"))
+ fatal("Can't get both program and data from <stdin>");
+ arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
+ arg[1].arg_ptr.arg_stab->stab_io = stio_new();
+ if (strEQ(d,"ARGV")) {
+ aadd(arg[1].arg_ptr.arg_stab);
+ arg[1].arg_ptr.arg_stab->stab_io->flags |=
+ IOF_ARGV|IOF_START;
+ }
+ }
+ }
+ break;
+ case '"':
+ arg[1].arg_type = A_DOUBLE;
+ makesingle = TRUE; /* maybe disable runtime scanning */
+ term = *s;
+ goto snarf_it;
+ case '`':
+ arg[1].arg_type = A_BACKTICK;
+ term = *s;
+ snarf_it:
+ {
+ STR *tmpstr;
+ int sqstart = line;
+ char *tmps;
+
+ tmpstr = str_new(strlen(s));
+ s = str_append_till(tmpstr,s+1,term,leave);
+ while (!*s) { /* multiple line string? */
+ s = str_gets(linestr, rsfp);
+ if (!s) {
+ line = sqstart;
+ fatal("EOF in string");
+ }
+ line++;
+ s = str_append_till(tmpstr,s,term,leave);
+ }
+ s++;
+ if (term == '\'') {
+ arg[1].arg_ptr.arg_str = tmpstr;
+ break;
+ }
+ tmps = s;
+ s = tmpstr->str_ptr;
+ while (*s) { /* see if we can make SINGLE */
+ if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
+ !index("`\"",term) )
+ *s = '$'; /* grandfather \digit in subst */
+ if (*s == '$' && s[1] && s[1] != ')' && s[1] != '|') {
+ makesingle = FALSE; /* force interpretation */
+ }
+ else if (*s == '\\' && s[1]) {
+ s++;
+ }
+ s++;
+ }
+ s = d = tmpstr->str_ptr; /* assuming shrinkage only */
+ while (*s) {
+ if (*s == '$' && s[1] && s[1] != ')' && s[1] != '|') {
+ int len;
+
+ len = scanreg(s,tokenbuf) - s;
+ stabent(tokenbuf,TRUE); /* make sure it's created */
+ while (len--)
+ *d++ = *s++;
+ continue;
+ }
+ else if (*s == '\\' && s[1]) {
+ s++;
+ switch (*s) {
+ default:
+ if (!makesingle && (!leave || index(leave,*s)))
+ *d++ = '\\';
+ *d++ = *s++;
+ continue;
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ *d = *s++ - '0';
+ if (index("01234567",*s)) {
+ *d <<= 3;
+ *d += *s++ - '0';
+ }
+ if (index("01234567",*s)) {
+ *d <<= 3;
+ *d += *s++ - '0';
+ }
+ d++;
+ continue;
+ case 'b':
+ *d++ = '\b';
+ break;
+ case 'n':
+ *d++ = '\n';
+ break;
+ case 'r':
+ *d++ = '\r';
+ break;
+ case 'f':
+ *d++ = '\f';
+ break;
+ case 't':
+ *d++ = '\t';
+ break;
+ }
+ s++;
+ continue;
+ }
+ *d++ = *s++;
+ }
+ *d = '\0';
+
+ if (arg[1].arg_type == A_DOUBLE && makesingle)
+ arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
+
+ tmpstr->str_cur = d - tmpstr->str_ptr; /* XXX cheat */
+ arg[1].arg_ptr.arg_str = tmpstr;
+ s = tmps;
+ break;
+ }
+ }
+ return s;
+}
+
+FCMD *
+load_format()
+{
+ FCMD froot;
+ FCMD *flinebeg;
+ register FCMD *fprev = &froot;
+ register FCMD *fcmd;
+ register char *s;
+ register char *t;
+ register char tmpchar;
+ bool noblank;
+
+ while ((s = str_gets(linestr,rsfp)) != Nullch) {
+ line++;
+ if (strEQ(s,".\n")) {
+ bufptr = s;
+ return froot.f_next;
+ }
+ if (*s == '#')
+ continue;
+ flinebeg = Nullfcmd;
+ noblank = FALSE;
+ while (*s) {
+ fcmd = (FCMD *)safemalloc(sizeof (FCMD));
+ bzero((char*)fcmd, sizeof (FCMD));
+ fprev->f_next = fcmd;
+ fprev = fcmd;
+ for (t=s; *t && *t != '@' && *t != '^'; t++) {
+ if (*t == '~') {
+ noblank = TRUE;
+ *t = ' ';
+ }
+ }
+ tmpchar = *t;
+ *t = '\0';
+ fcmd->f_pre = savestr(s);
+ fcmd->f_presize = strlen(s);
+ *t = tmpchar;
+ s = t;
+ if (!*s) {
+ if (noblank)
+ fcmd->f_flags |= FC_NOBLANK;
+ break;
+ }
+ if (!flinebeg)
+ flinebeg = fcmd; /* start values here */
+ if (*s++ == '^')
+ fcmd->f_flags |= FC_CHOP; /* for doing text filling */
+ switch (*s) {
+ case '*':
+ fcmd->f_type = F_LINES;
+ *s = '\0';
+ break;
+ case '<':
+ fcmd->f_type = F_LEFT;
+ while (*s == '<')
+ s++;
+ break;
+ case '>':
+ fcmd->f_type = F_RIGHT;
+ while (*s == '>')
+ s++;
+ break;
+ case '|':
+ fcmd->f_type = F_CENTER;
+ while (*s == '|')
+ s++;
+ break;
+ default:
+ fcmd->f_type = F_LEFT;
+ break;
+ }
+ if (fcmd->f_flags & FC_CHOP && *s == '.') {
+ fcmd->f_flags |= FC_MORE;
+ while (*s == '.')
+ s++;
+ }
+ fcmd->f_size = s-t;
+ }
+ if (flinebeg) {
+ again:
+ if ((bufptr = str_gets(linestr ,rsfp)) == Nullch)
+ goto badform;
+ line++;
+ if (strEQ(bufptr,".\n")) {
+ yyerror("Missing values line");
+ return froot.f_next;
+ }
+ if (*bufptr == '#')
+ goto again;
+ lex_newlines = TRUE;
+ while (flinebeg || *bufptr) {
+ switch(yylex()) {
+ default:
+ yyerror("Bad value in format");
+ *bufptr = '\0';
+ break;
+ case '\n':
+ if (flinebeg)
+ yyerror("Missing value in format");
+ *bufptr = '\0';
+ break;
+ case REG:
+ yylval.arg = stab2arg(A_LVAL,yylval.stabval);
+ /* FALL THROUGH */
+ case RSTRING:
+ if (!flinebeg)
+ yyerror("Extra value in format");
+ else {
+ flinebeg->f_expr = yylval.arg;
+ do {
+ flinebeg = flinebeg->f_next;
+ } while (flinebeg && flinebeg->f_size == 0);
+ }
+ break;
+ case ',': case ';':
+ continue;
+ }
+ }
+ lex_newlines = FALSE;
+ }
+ }
+ badform:
+ bufptr = str_get(linestr);
+ yyerror("Format not terminated");
+ return froot.f_next;
+}