summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLarry Wall <lwall@netlabs.com>1992-06-08 04:52:17 +0000
committerLarry Wall <lwall@netlabs.com>1992-06-08 04:52:17 +0000
commit2f3197b3c480b4120c210442c74a59d064d932c9 (patch)
tree30417bb150bf52940d2261a681fe50d527e89784
parent7c0587c85ff56c1fa1d95bc5228a7aff2da43d6c (diff)
downloadperl-2f3197b3c480b4120c210442c74a59d064d932c9.tar.gz
perl 4.0 patch 33: patch #20, continued
See patch #20.
-rw-r--r--atarist/wildmat.c507
-rw-r--r--patchlevel.h2
-rw-r--r--toke.c394
3 files changed, 785 insertions, 118 deletions
diff --git a/atarist/wildmat.c b/atarist/wildmat.c
new file mode 100644
index 0000000000..ec152d4dfb
--- /dev/null
+++ b/atarist/wildmat.c
@@ -0,0 +1,507 @@
+/* $Revision: 4.0.1.1 $
+**
+** Do shell-style pattern matching for ?, \, [], and * characters.
+** Might not be robust in face of malformed patterns; e.g., "foo[a-"
+** could cause a segmentation violation. It is 8bit clean.
+**
+** Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
+** Rich $alz is now <rsalz@bbn.com>.
+** April, 1991: Replaced mutually-recursive calls with in-line code
+** for the star character.
+**
+** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the ABORT code.
+** This can greatly speed up failing wildcard patterns. For example:
+** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-*
+** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1
+** text 2: -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1
+** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without
+** the ABORT, then it takes 22310 calls to fail. Ugh. The following
+** explanation is from Lars:
+** The precondition that must be fulfilled is that DoMatch will consume
+** at least one character in text. This is true if *p is neither '*' nor
+** '\0'.) The last return has ABORT instead of FALSE to avoid quadratic
+** behaviour in cases like pattern "*a*b*c*d" with text "abcxxxxx". With
+** FALSE, each star-loop has to run to the end of the text; with ABORT
+** only the last one does.
+**
+** Once the control of one instance of DoMatch enters the star-loop, that
+** instance will return either TRUE or ABORT, and any calling instance
+** will therefore return immediately after (without calling recursively
+** again). In effect, only one star-loop is ever active. It would be
+** possible to modify the code to maintain this context explicitly,
+** eliminating all recursive calls at the cost of some complication and
+** loss of clarity (and the ABORT stuff seems to be unclear enough by
+** itself). I think it would be unwise to try to get this into a
+** released version unless you have a good test data base to try it out
+** on.
+*/
+
+#define TRUE 1
+#define FALSE 0
+#define ABORT -1
+
+
+ /* What character marks an inverted character class? */
+#define NEGATE_CLASS '^'
+ /* Is "*" a common pattern? */
+#define OPTIMIZE_JUST_STAR
+ /* Do tar(1) matching rules, which ignore a trailing slash? */
+#undef MATCH_TAR_PATTERN
+
+
+/*
+** Match text and p, return TRUE, FALSE, or ABORT.
+*/
+static int
+DoMatch(text, p)
+ char *text;
+ char *p;
+{
+ int last;
+ int matched;
+ int reverse;
+
+ for ( ; *p; text++, p++) {
+ if (*text == '\0' && *p != '*')
+ return ABORT;
+ switch (*p) {
+ case '\\':
+ /* Literal match with following character. */
+ p++;
+ /* FALLTHROUGH */
+ default:
+ if (*text != *p)
+ return FALSE;
+ continue;
+ case '?':
+ /* Match anything. */
+ continue;
+ case '*':
+ while (*++p == '*')
+ /* Consecutive stars act just like one. */
+ continue;
+ if (*p == '\0')
+ /* Trailing star matches everything. */
+ return TRUE;
+ while (*text)
+ if ((matched = DoMatch(text++, p)) != FALSE)
+ return matched;
+ return ABORT;
+ case '[':
+ reverse = p[1] == NEGATE_CLASS ? TRUE : FALSE;
+ if (reverse)
+ /* Inverted character class. */
+ p++;
+ for (last = 0400, matched = FALSE; *++p && *p != ']'; last = *p)
+ /* This next line requires a good C compiler. */
+ if (*p == '-' ? *text <= *++p && *text >= last : *text == *p)
+ matched = TRUE;
+ if (matched == reverse)
+ return FALSE;
+ continue;
+ }
+ }
+
+#ifdef MATCH_TAR_PATTERN
+ if (*text == '/')
+ return TRUE;
+#endif /* MATCH_TAR_ATTERN */
+ return *text == '\0';
+}
+
+
+/*
+** User-level routine. Returns TRUE or FALSE.
+*/
+int
+wildmat(text, p)
+ char *text;
+ char *p;
+{
+#ifdef OPTIMIZE_JUST_STAR
+ if (p[0] == '*' && p[1] == '\0')
+ return TRUE;
+#endif /* OPTIMIZE_JUST_STAR */
+ return DoMatch(text, p) == TRUE;
+}
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#if __STDC__
+#ifdef unix
+#define _SIZE_T /* unix defines size_t in sys/types.h */
+#endif
+#ifndef _COMPILER_H
+# include <compiler.h>
+#endif
+#include <stddef.h>
+#include <stdlib.h>
+#else
+extern char *malloc(), *realloc();
+extern char *rindex(), *strdup();
+#define __PROTO(x) ()
+#endif
+#include <string.h>
+
+#define MAX_DIR 32 /* max depth of dir recursion */
+static struct {
+ char *dir, *patt;
+} dir_stack[MAX_DIR];
+static int stack_p;
+static char **matches;
+static int nmatches;
+
+static void *ck_memalloc __PROTO((void *));
+#define ck_strdup(p) ck_memalloc(strdup(p))
+#define ck_malloc(s) ck_memalloc(malloc(s))
+#define ck_realloc(p, s) ck_memalloc(realloc(p, s))
+
+
+#define DEBUGX(x)
+
+/*
+ * return true if patt contains a wildcard char
+ */
+int contains_wild(patt)
+char *patt;
+{
+ char c;
+ char *p;
+
+ /* only check for wilds in the basename part of the pathname only */
+ if((p = rindex(patt, '/')) == NULL)
+ p = rindex(patt, '\\');
+ if(!p)
+ p = patt;
+
+ while((c = *p++))
+ if((c == '*') || (c == '?') || (c == '['))
+ return 1;
+ return 0;
+}
+
+#ifndef ZOO
+void free_all()
+{
+ char **p;
+
+ if(!matches)
+ return;
+
+ for(p = matches; *p; p++)
+ free(*p);
+ free(matches);
+ matches = NULL;
+}
+#endif
+
+static void push(dir, patt)
+char *dir;
+char *patt;
+{
+ if(stack_p < (MAX_DIR - 2))
+ stack_p++;
+ else
+ {
+ fprintf(stderr,"directory stack overflow\n");
+ exit(99);
+ }
+ dir_stack[stack_p].dir = dir;
+ dir_stack[stack_p].patt = patt;
+}
+
+/*
+ * glob patt
+ * if decend_dir is true, recursively decend any directories encountered.
+ * returns pointer to all matches encountered.
+ * if the initial patt is a directory, and decend_dir is true, it is
+ * equivalent to specifying the pattern "patt\*"
+ *
+ * Restrictions:
+ * - handles wildcards only in the base part of a pathname
+ * ie: will not handle \foo\*\bar\ (wildcard in the middle of pathname)
+ *
+ * - max dir recursion is MAX_DIR
+ *
+ * - on certain failures it will just skip potential matches as if they
+ * were not present.
+ *
+ * ++jrb bammi@cadence.com
+ */
+static char **do_match __PROTO((int decend_dir));
+
+char **glob(patt, decend_dir)
+char *patt;
+int decend_dir;
+{
+ char *dir, *basepatt, *p;
+ struct stat s;
+
+ DEBUGX((fprintf(stderr,"glob(%s, %d)\n", patt, decend_dir)));
+ matches = NULL;
+ nmatches = 0;
+ stack_p = -1;
+
+ /* first check for wildcards */
+ if(contains_wild(patt))
+ {
+ /* break it up into dir and base patt, do_matches and return */
+ p = ck_strdup(patt);
+ if((basepatt = rindex(p, '/')) == NULL)
+ basepatt = rindex(p, '\\');
+ if(basepatt)
+ {
+ dir = p;
+ *basepatt++ = '\0';
+ basepatt = ck_strdup(basepatt);
+ }
+ else
+ {
+ dir = ck_strdup(".");
+ basepatt = p;
+ }
+
+ if(strcmp(basepatt, "*.*") == 0)
+ {
+ /* the desktop, and other braindead shells strike again */
+ basepatt[1] = '\0';
+ }
+ push(dir, basepatt);
+ DEBUGX((fprintf(stderr, "calling %s, %s\n", dir, basepatt)));
+ return do_match(decend_dir);
+ }
+
+ /* if no wilds, check for dir */
+ if(decend_dir && (!stat(patt, &s)))
+ {
+ if((s.st_mode & S_IFMT) == S_IFDIR)
+ { /* is a dir */
+ size_t len = strlen(patt);
+
+ dir = ck_strdup(patt);
+ --len;
+ if(len && ((dir[len] == '/')
+#ifdef atarist
+ || (dir[len] == '\\')
+#endif
+ ))
+ dir[len] = '\0';
+ basepatt = ck_strdup("*");
+ push(dir, basepatt);
+ DEBUGX((fprintf(stderr, "calling %s, %s\n", dir, basepatt)));
+ return do_match(decend_dir);
+ }
+ }
+ return NULL;
+}
+
+static char **do_match(decend_dir)
+int decend_dir;
+{
+ DIR *dirp;
+ struct dirent *d;
+ struct stat s;
+ char *dir, *basepatt;
+
+ while(stack_p >= 0)
+ {
+ dir = ck_strdup(dir_stack[stack_p].dir);
+ free(dir_stack[stack_p].dir);
+ basepatt = ck_strdup(dir_stack[stack_p].patt);
+ free(dir_stack[stack_p--].patt);
+
+ DEBUGX((fprintf(stderr,"dir %s patt %s stack %d\n", dir, basepatt, stack_p)));
+
+ dirp = opendir(dir);
+ if(!dirp)
+ {
+ free(dir);
+ DEBUGX((fprintf(stderr,"no dir\n")));
+ continue;
+ }
+
+ while((d = readdir(dirp)))
+ {
+ char *p = ck_malloc(strlen(dir) + strlen(d->d_name) + 2L);
+ if(strcmp(dir, "."))
+ /* If we have a full pathname then */
+ { /* let's append the directory info */
+ strcpy(p, dir);
+#ifndef unix
+ strcat(p, "\\");
+#else
+ strcat(p, "/");
+#endif
+ strcat(p, d->d_name);
+ }
+ else /* Otherwise, the name is just fine, */
+ strcpy(p, d->d_name); /* there's no need for './' -- bjsjr */
+
+ DEBUGX((fprintf(stderr, "Testing %s\n", p)));
+ if(!stat(p, &s)) /* if stat fails, ignore it */
+ {
+ if( ((s.st_mode & S_IFMT) == S_IFREG) ||
+ ((s.st_mode & S_IFMT) == S_IFLNK) )
+ { /* it is a file/symbolic link */
+ if(wildmat(d->d_name, basepatt))
+ { /* it matches pattern */
+ DEBUGX((fprintf(stderr,"File Matched\n")));
+ if(matches == NULL)
+ matches = (char **)ck_malloc(sizeof(char *));
+ else
+ matches = (char **)
+ ck_realloc(matches, (nmatches+1)*sizeof(char *));
+ matches[nmatches++] = p;
+ } /* no match */
+ else
+ {
+ DEBUGX((fprintf(stderr,"No File Match\n")));
+ free(p);
+ }
+ } else if(decend_dir && ((s.st_mode & S_IFMT) == S_IFDIR))
+ {
+ if(!((!strcmp(d->d_name,".")) || (!strcmp(d->d_name, "..")
+#ifdef atarist
+ || (!strcmp(d->d_name, ".dir"))
+#endif
+ )))
+ {
+ char *push_p = ck_strdup("*");
+ push(p, push_p);
+ DEBUGX((fprintf(stderr,"Dir pushed\n")));
+ }
+ else
+ {
+ DEBUGX((fprintf(stderr, "DIR skipped\n")));
+ free(p);
+ }
+ }
+ else
+ {
+ DEBUGX((fprintf(stderr, "Not a dir/no decend\n")));
+ free(p);
+ }
+ } /* stat */
+ else
+ {
+ DEBUGX((fprintf(stderr, "Stat failed\n")));
+ free(p);
+ }
+ } /* while readdir */
+ closedir(dirp);
+ free(basepatt);
+ free(dir);
+ DEBUGX((fprintf(stderr, "Dir done\n\n")));
+ } /* while dirs in stack */
+
+ if(!nmatches)
+ {
+ DEBUGX((fprintf(stderr, "No matches\n")));
+ return NULL;
+ }
+
+ matches = (char **)realloc(matches, (nmatches+1)*sizeof(char *));
+ if(!matches)
+ { return NULL; }
+ matches[nmatches] = NULL;
+ DEBUGX((fprintf(stderr, "%d matches\n", nmatches)));
+ return matches;
+}
+
+#ifdef ZOO
+#include "errors.i"
+#endif
+
+static void *ck_memalloc(p)
+void *p;
+{
+ if(!p)
+ {
+#ifndef ZOO
+ fprintf(stderr, "Out of memory\n");
+ exit(98);
+#else
+ prterror('f', no_memory);
+#endif
+ }
+ return p;
+}
+
+#ifdef TEST_GLOB
+void test(path, dec)
+char *path;
+int dec;
+{
+ char **m;
+ char **matches;
+
+ printf("Testing %s %d\n", path, dec);
+ matches = glob(path, dec);
+ if(!matches)
+ {
+ printf("No matches\n");
+ }
+ else
+ {
+ for(m = matches; *m; m++)
+ printf("%s\n", *m);
+ putchar('\n');
+ free_all();
+ }
+}
+
+int main()
+{
+#ifndef unix
+ test("e:\\lib\\*.olb", 0);
+ test("e:\\lib", 0);
+ test("e:\\lib\\", 1);
+#else
+ test("/net/acae127/home/bammi/News/comp.sources.misc/*.c", 0);
+ test("/net/acae127/home/bammi/News/comp.sources.misc", 0);
+ test("/net/acae127/home/bammi/News/comp.sources.misc", 1);
+ test("/net/acae127/home/bammi/atari/cross-gcc", 1);
+#endif
+
+ return 0;
+}
+
+#endif
+
+#ifdef TEST_WILDMAT
+#include <stdio.h>
+
+/* Yes, we use gets not fgets. Sue me. */
+extern char *gets();
+
+
+main()
+{
+ char pattern[80];
+ char text[80];
+
+ printf("Wildmat tester. Enter pattern, then strings to test.\n");
+ printf("A blank line gets prompts for a new pattern; a blank pattern\n");
+ printf("exits the program.\n\n");
+
+ for ( ; ; ) {
+ printf("Enter pattern: ");
+ if (gets(pattern) == NULL)
+ break;
+ for ( ; ; ) {
+ printf("Enter text: ");
+ if (gets(text) == NULL)
+ exit(0);
+ if (text[0] == '\0')
+ /* Blank line; go back and get a new pattern. */
+ break;
+ printf(" %s\n", wildmat(text, pattern) ? "YES" : "NO");
+ }
+ }
+
+ exit(0);
+ /* NOTREACHED */
+}
+#endif /* TEST_WILDMAT */
diff --git a/patchlevel.h b/patchlevel.h
index 1d54f19971..1d5b76f174 100644
--- a/patchlevel.h
+++ b/patchlevel.h
@@ -1 +1 @@
-#define PATCHLEVEL 32
+#define PATCHLEVEL 33
diff --git a/toke.c b/toke.c
index 4858c2c246..b3afce61d4 100644
--- a/toke.c
+++ b/toke.c
@@ -1,4 +1,4 @@
-/* $RCSfile: toke.c,v $$Revision: 4.0.1.5 $$Date: 91/11/11 16:45:51 $
+/* $RCSfile: toke.c,v $$Revision: 4.0.1.6 $$Date: 92/06/08 16:03:49 $
*
* Copyright (c) 1991, Larry Wall
*
@@ -6,6 +6,19 @@
* License or the Artistic License, as specified in the README file.
*
* $Log: toke.c,v $
+ * Revision 4.0.1.6 92/06/08 16:03:49 lwall
+ * patch20: an EXPR may now start with a bareword
+ * patch20: print $fh EXPR can now expect term rather than operator in EXPR
+ * patch20: added ... as variant on ..
+ * patch20: new warning on spurious backslash
+ * patch20: new warning on missing $ for foreach variable
+ * patch20: "foo"x1024 now legal without space after x
+ * patch20: new warning on print accidentally used as function
+ * patch20: tr/stuff// wasn't working right
+ * patch20: 2. now eats the dot
+ * patch20: <@ARGV> now notices @ARGV
+ * patch20: tr/// now lets you say \-
+ *
* Revision 4.0.1.5 91/11/11 16:45:51 lwall
* patch19: default arg for shift was wrong after first subroutine definition
*
@@ -39,6 +52,8 @@
#include "perl.h"
#include "perly.h"
+static void set_csh();
+
#ifdef I_FCNTL
#include <fcntl.h>
#endif
@@ -63,7 +78,11 @@ void checkcomma();
#endif
#define CLINE (cmdline = (curcmd->c_line < cmdline ? curcmd->c_line : cmdline))
+#ifdef atarist
+#define PERL_META(c) ((c) | 128)
+#else
#define META(c) ((c) | 128)
+#endif
#define RETURN(retval) return (bufptr = s,(int)retval)
#define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
@@ -93,18 +112,29 @@ void checkcomma();
#define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
#define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)
+static char *last_uni;
+
/* This bit of chicanery makes a unary function followed by
* a parenthesis into a function with one argument, highest precedence.
*/
-#define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
+#define UNI(f) return(yylval.ival = f, \
+ expectterm = TRUE, \
+ bufptr = s, \
+ last_uni = oldbufptr, \
(*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
/* This does similarly for list operators, merely by pretending that the
* paren came before the listop rather than after.
*/
+#ifdef atarist
+#define LOP(f) return(CLINE, *s == '(' || (s = skipspace(s), *s == '(') ? \
+ (*s = (char) PERL_META('('), bufptr = oldbufptr, '(') : \
+ (yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
+#else
#define LOP(f) return(CLINE, *s == '(' || (s = skipspace(s), *s == '(') ? \
(*s = (char) META('('), bufptr = oldbufptr, '(') : \
(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
+#endif
/* grandfather return to old style */
#define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)
@@ -117,6 +147,22 @@ register char *s;
return s;
}
+void
+check_uni() {
+ char *s;
+ char ch;
+
+ if (oldoldbufptr != last_uni)
+ return;
+ while (isSPACE(*last_uni))
+ last_uni++;
+ for (s = last_uni; isALNUM(*s); s++) ;
+ ch = *s;
+ *s = '\0';
+ warn("Warning: Use of \"%s\" without parens is ambiguous", last_uni);
+ *s = ch;
+}
+
#ifdef CRIPPLED_CC
#undef UNI
@@ -132,6 +178,7 @@ char *s;
yylval.ival = f;
expectterm = TRUE;
bufptr = s;
+ last_uni = oldbufptr;
if (*s == '(')
return FUNC1;
s = skipspace(s);
@@ -150,7 +197,11 @@ char *s;
if (*s != '(')
s = skipspace(s);
if (*s == '(') {
+#ifdef atarist
+ *s = PERL_META('(');
+#else
*s = META('(');
+#endif
bufptr = oldbufptr;
return '(';
}
@@ -164,6 +215,7 @@ char *s;
#endif /* CRIPPLED_CC */
+int
yylex()
{
register char *s = bufptr;
@@ -190,6 +242,10 @@ yylex()
*s++ = '(';
oldbufptr = s;
}
+ else if ((*s & 127) == '}') {
+ *s++ = '}';
+ RETURN('}');
+ }
else
warn("Unrecognized character \\%03o ignored", *s++ & 255);
goto retry;
@@ -201,6 +257,10 @@ yylex()
*s++ = '(';
oldbufptr = s;
}
+ else if ((*s & 127) == '}') {
+ *s++ = '}';
+ RETURN('}');
+ }
else
warn("Unrecognized character \\%03o ignored", *s++ & 255);
goto retry;
@@ -212,6 +272,7 @@ yylex()
RETURN(0);
if (s++ < bufend)
goto retry; /* ignore stray nulls */
+ last_uni = 0;
if (firstline) {
firstline = FALSE;
if (minus_n || minus_p || perldb) {
@@ -413,8 +474,11 @@ yylex()
s++;
RETURN(DEC);
}
- if (expectterm)
+ if (expectterm) {
+ if (isSPACE(*s) || !isSPACE(*bufptr))
+ check_uni();
OPERATOR('-');
+ }
else
AOP(O_SUBTRACT);
case '+':
@@ -423,13 +487,17 @@ yylex()
s++;
RETURN(INC);
}
- if (expectterm)
+ if (expectterm) {
+ if (isSPACE(*s) || !isSPACE(*bufptr))
+ check_uni();
OPERATOR('+');
+ }
else
AOP(O_ADD);
case '*':
if (expectterm) {
+ check_uni();
s = scanident(s,bufend,tokenbuf);
yylval.stabval = stabent(tokenbuf,TRUE);
TERM(STAR);
@@ -442,6 +510,8 @@ yylex()
MOP(O_MULTIPLY);
case '%':
if (expectterm) {
+ if (!isALPHA(s[1]))
+ check_uni();
s = scanident(s,bufend,tokenbuf);
yylval.stabval = hadd(stabent(tokenbuf,TRUE));
TERM(HSH);
@@ -473,8 +543,8 @@ yylex()
tmp = *s++;
TERM(tmp);
case '}':
- tmp = *s++;
- RETURN(tmp);
+ *s |= 128;
+ RETURN(';');
case '&':
s++;
tmp = *s++;
@@ -487,6 +557,8 @@ yylex()
s++;
if (isALPHA(*s) || *s == '_' || *s == '\'')
*(--s) = '\\'; /* force next ident to WORD */
+ else
+ check_uni();
OPERATOR(AMPER);
}
OPERATOR('&');
@@ -517,7 +589,9 @@ yylex()
OPERATOR('!');
case '<':
if (expectterm) {
- s = scanstr(s);
+ if (s[1] != '<' && !index(s,'>'))
+ check_uni();
+ s = scanstr(s, SCAN_DEF);
TERM(RSTRING);
}
s++;
@@ -570,7 +644,21 @@ yylex()
goto retry;
}
yylval.stabval = stabent(tokenbuf,TRUE);
- TERM(REG);
+ expectterm = FALSE;
+ if (isSPACE(*s) && oldoldbufptr && oldoldbufptr < bufptr) {
+ s++;
+ while (isSPACE(*oldoldbufptr))
+ oldoldbufptr++;
+ if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5)) {
+ if (index("&*<%", *s) && isALPHA(s[1]))
+ expectterm = TRUE; /* e.g. print $fh &sub */
+ else if (*s == '.' && isDIGIT(s[1]))
+ expectterm = TRUE; /* e.g. print $fh .3 */
+ else if (index("/?-+", *s) && !isSPACE(s[1]))
+ expectterm = TRUE; /* e.g. print $fh -1 */
+ }
+ }
+ RETURN(REG);
case '@':
d = s;
@@ -583,6 +671,7 @@ yylex()
case '/': /* may either be division or pattern */
case '?': /* may either be conditional or pattern */
if (expectterm) {
+ check_uni();
s = scanpat(s);
TERM(PATTERN);
}
@@ -596,19 +685,31 @@ yylex()
tmp = *s++;
if (*s == tmp) {
s++;
+ if (*s == tmp) {
+ s++;
+ yylval.ival = 0;
+ }
+ else
+ yylval.ival = AF_COMMON;
OPERATOR(DOTDOT);
}
+ if (expectterm)
+ check_uni();
AOP(O_CONCAT);
}
/* FALL THROUGH */
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '\'': case '"': case '`':
- s = scanstr(s);
+ s = scanstr(s, SCAN_DEF);
TERM(RSTRING);
case '\\': /* some magic to force next word to be a WORD */
s++; /* used by do and sub to force a separate namespace */
+ if (!isALPHA(*s) && *s != '_' && *s != '\'') {
+ warn("Spurious backslash ignored");
+ goto retry;
+ }
/* FALL THROUGH */
case '_':
SNARFWORD;
@@ -627,14 +728,14 @@ yylex()
TERM(RSTRING);
}
else if (strEQ(d,"__END__")) {
-#ifndef TAINT
STAB *stab;
int fd;
/*SUPPRESS 560*/
- if (stab = stabent("DATA",FALSE)) {
+ if (!in_eval && (stab = stabent("DATA",FALSE))) {
stab->str_pok |= SP_MULTI;
- stab_io(stab) = stio_new();
+ if (!stab_io(stab))
+ stab_io(stab) = stio_new();
stab_io(stab)->ifp = rsfp;
#if defined(HAS_FCNTL) && defined(F_SETFD)
fd = fileno(rsfp);
@@ -648,7 +749,6 @@ yylex()
stab_io(stab)->type = '<';
rsfp = Nullfp;
}
-#endif
goto fake_eof;
}
}
@@ -773,6 +873,10 @@ yylex()
SNARFWORD;
if (strEQ(d,"for") || strEQ(d,"foreach")) {
yylval.ival = curcmd->c_line;
+ while (s < bufend && isSPACE(*s))
+ s++;
+ if (isALPHA(*s))
+ fatal("Missing $ on loop variable");
OPERATOR(FOR);
}
if (strEQ(d,"format")) {
@@ -981,11 +1085,11 @@ yylex()
case 'p': case 'P':
SNARFWORD;
if (strEQ(d,"print")) {
- checkcomma(s,"filehandle");
+ checkcomma(s,d,"filehandle");
LOP(O_PRINT);
}
if (strEQ(d,"printf")) {
- checkcomma(s,"filehandle");
+ checkcomma(s,d,"filehandle");
LOP(O_PRTF);
}
if (strEQ(d,"push")) {
@@ -999,20 +1103,20 @@ yylex()
if (strEQ(d,"package"))
OPERATOR(PACKAGE);
if (strEQ(d,"pipe"))
- FOP22(O_PIPE);
+ FOP22(O_PIPE_OP);
break;
case 'q': case 'Q':
SNARFWORD;
if (strEQ(d,"q")) {
- s = scanstr(s-1);
+ s = scanstr(s-1, SCAN_DEF);
TERM(RSTRING);
}
if (strEQ(d,"qq")) {
- s = scanstr(s-2);
+ s = scanstr(s-2, SCAN_DEF);
TERM(RSTRING);
}
if (strEQ(d,"qx")) {
- s = scanstr(s-2);
+ s = scanstr(s-2, SCAN_DEF);
TERM(RSTRING);
}
break;
@@ -1145,7 +1249,7 @@ yylex()
if (strEQ(d,"socketpair"))
FOP25(O_SOCKPAIR);
if (strEQ(d,"sort")) {
- checkcomma(s,"subroutine name");
+ checkcomma(s,d,"subroutine name");
d = bufend;
while (s < d && isSPACE(*s)) s++;
if (*s == ';' || *s == ')') /* probably a close */
@@ -1154,6 +1258,7 @@ yylex()
/*SUPPRESS 530*/
for (d = s; isALNUM(*d); d++) ;
strncpy(tokenbuf,s,d-s);
+ tokenbuf[d-s] = '\0';
if (strNE(tokenbuf,"keys") &&
strNE(tokenbuf,"values") &&
strNE(tokenbuf,"split") &&
@@ -1324,9 +1429,16 @@ yylex()
FOP(O_WRITE);
break;
case 'x': case 'X':
- SNARFWORD;
- if (!expectterm && strEQ(d,"x"))
+ if (*s == 'x' && isDIGIT(s[1]) && !expectterm) {
+ s++;
MOP(O_REPEAT);
+ }
+ SNARFWORD;
+ if (strEQ(d,"x")) {
+ if (!expectterm)
+ MOP(O_REPEAT);
+ check_uni();
+ }
break;
case 'y': case 'Y':
if (s[1] == '\'') {
@@ -1346,6 +1458,15 @@ yylex()
break;
}
yylval.cval = savestr(d);
+ if (expectterm == 2) { /* special case: start of statement */
+ while (isSPACE(*s)) s++;
+ if (*s == ':') {
+ s++;
+ CLINE;
+ OPERATOR(LABEL);
+ }
+ TERM(WORD);
+ }
expectterm = FALSE;
if (oldoldbufptr && oldoldbufptr < bufptr) {
while (isSPACE(*oldoldbufptr))
@@ -1359,30 +1480,40 @@ yylex()
}
void
-checkcomma(s,what)
+checkcomma(s,name,what)
register char *s;
+char *name;
char *what;
{
- char *someword;
-
+ char *w;
+
+ if (dowarn && *s == ' ' && s[1] == '(') {
+ w = index(s,')');
+ if (w)
+ for (w++; *w && isSPACE(*w); w++) ;
+ if (!w || !*w || !index(";|}", *w)) /* an advisory hack only... */
+ warn("%s (...) interpreted as function",name);
+ }
+ while (s < bufend && isSPACE(*s))
+ s++;
if (*s == '(')
s++;
while (s < bufend && isSPACE(*s))
s++;
if (isALPHA(*s) || *s == '_') {
- someword = s++;
+ w = s++;
while (isALNUM(*s))
s++;
while (s < bufend && isSPACE(*s))
s++;
if (*s == ',') {
*s = '\0';
- someword = instr(
+ w = instr(
"tell eof times getlogin wait length shift umask getppid \
cos exp int log rand sin sqrt ord wantarray",
- someword);
+ w);
*s = ',';
- if (someword)
+ if (w)
return;
fatal("No comma allowed after %s", what);
}
@@ -1492,7 +1623,7 @@ int len;
e = d;
break;
}
- (void)bcopy(d+1,d,e-d);
+ Move(d+1,d,e-d,char);
e--;
switch(*d) {
case 'n':
@@ -1626,11 +1757,7 @@ register char *s;
}
}
if (spat->spat_flags & SPAT_FOLD)
-#ifdef STRUCTCOPY
- savespat = *spat;
-#else
- (void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
-#endif
+ StructCopy(spat, &savespat, SPAT);
scanconst(spat,str->str_ptr,len);
if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
@@ -1642,11 +1769,7 @@ register char *s;
}
else {
if (spat->spat_flags & SPAT_FOLD)
-#ifdef STRUCTCOPY
- *spat = savespat;
-#else
- (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
-#endif
+ StructCopy(&savespat, spat, SPAT);
if (spat->spat_short)
fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
@@ -1660,20 +1783,25 @@ register char *s;
}
char *
-scansubst(s)
-register char *s;
+scansubst(start)
+char *start;
{
+ register char *s = start;
register SPAT *spat;
register char *d;
register char *e;
int len;
STR *str = Str_new(93,0);
+ char term = *s;
+
+ if (term && (d = index("([{< )]}> )]}>",term)))
+ term = d[5];
Newz(802,spat,1,SPAT);
spat->spat_next = curstash->tbl_spatroot; /* link into spat list */
curstash->tbl_spatroot = spat;
- s = str_append_till(str,s+1,bufend,*s,patleave);
+ s = str_append_till(str,s+1,bufend,term,patleave);
if (s >= bufend) {
str_free(str);
yyerror("Substitution pattern not terminated");
@@ -1712,7 +1840,9 @@ register char *s;
}
scanconst(spat,str->str_ptr,len);
get_repl:
- s = scanstr(s);
+ if (term != *start)
+ s++;
+ s = scanstr(s, SCAN_REPL);
if (s >= bufend) {
str_free(str);
yyerror("Substitution replacement not terminated");
@@ -1736,12 +1866,17 @@ get_repl:
}
}
while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
+ int es = 0;
+
if (*s == 'e') {
s++;
+ es++;
if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
spat->spat_repl[1].arg_type = A_SINGLE;
spat->spat_repl = make_op(
- (spat->spat_repl[1].arg_type == A_SINGLE ? O_EVALONCE : O_EVAL),
+ (!es && spat->spat_repl[1].arg_type == A_SINGLE
+ ? O_EVALONCE
+ : O_EVAL),
2,
spat->spat_repl,
Nullarg,
@@ -1818,36 +1953,14 @@ register SPAT *spat;
}
char *
-expand_charset(s,len,retlen)
-register char *s;
-int len;
-int *retlen;
-{
- char t[520];
- register char *d = t;
- register int i;
- register char *send = s + len;
-
- while (s < send && d - t <= 256) {
- if (s[1] == '-' && s+2 < send) {
- for (i = (s[0] & 0377); i <= (s[2] & 0377); i++)
- *d++ = i;
- s += 3;
- }
- else
- *d++ = *s++;
- }
- *d = '\0';
- *retlen = d - t;
- return nsavestr(t,d-t);
-}
-
-char *
-scantrans(s)
-register char *s;
+scantrans(start)
+char *start;
{
+ register char *s = start;
ARG *arg =
l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
+ STR *tstr;
+ STR *rstr;
register char *t;
register char *r;
register short *tbl;
@@ -1861,21 +1974,34 @@ register char *s;
New(803,tbl,256,short);
arg[2].arg_type = A_NULL;
arg[2].arg_ptr.arg_cval = (char*) tbl;
- s = scanstr(s);
+
+ s = scanstr(s, SCAN_TR);
if (s >= bufend) {
yyerror("Translation pattern not terminated");
yylval.arg = Nullarg;
return s;
}
- t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
- yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
+ tstr = yylval.arg[1].arg_ptr.arg_str;
+ yylval.arg[1].arg_ptr.arg_str = Nullstr;
arg_free(yylval.arg);
- s = scanstr(s-1);
+ t = tstr->str_ptr;
+ tlen = tstr->str_cur;
+
+ if (s[-1] == *start)
+ s--;
+
+ s = scanstr(s, SCAN_TR|SCAN_REPL);
if (s >= bufend) {
yyerror("Translation replacement not terminated");
yylval.arg = Nullarg;
return s;
}
+ rstr = yylval.arg[1].arg_ptr.arg_str;
+ yylval.arg[1].arg_ptr.arg_str = Nullstr;
+ arg_free(yylval.arg);
+ r = rstr->str_ptr;
+ rlen = rstr->str_cur;
+
complement = delete = squash = 0;
while (*s == 'c' || *s == 'd' || *s == 's') {
if (*s == 'c')
@@ -1886,15 +2012,8 @@ register char *s;
squash = 1;
s++;
}
- r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
- yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
- arg_free(yylval.arg);
arg[2].arg_len = delete|squash;
yylval.arg = arg;
- if (!rlen && !delete) {
- Safefree(r);
- r = t; rlen = tlen;
- }
if (complement) {
Zero(tbl, 256, short);
for (i = 0; i < tlen; i++)
@@ -1904,15 +2023,20 @@ register char *s;
if (j >= rlen) {
if (delete)
tbl[i] = -2;
+ else if (rlen)
+ tbl[i] = r[j-1] & 0377;
else
- tbl[i] = r[j-1];
+ tbl[i] = i;
}
else
- tbl[i] = r[j++];
+ tbl[i] = r[j++] & 0377;
}
}
}
else {
+ if (!rlen && !delete) {
+ r = t; rlen = tlen;
+ }
for (i = 0; i < 256; i++)
tbl[i] = -1;
for (i = 0, j = 0; i < tlen; i++,j++) {
@@ -1928,16 +2052,17 @@ register char *s;
tbl[t[i] & 0377] = r[j] & 0377;
}
}
- if (r != t)
- Safefree(r);
- Safefree(t);
+ str_free(tstr);
+ str_free(rstr);
return s;
}
char *
-scanstr(s)
-register char *s;
+scanstr(start, in_what)
+char *start;
+int in_what;
{
+ register char *s = start;
register char term;
register char *d;
register ARG *arg;
@@ -1948,7 +2073,10 @@ register char *s;
bool hereis = FALSE;
STR *herewas;
STR *str;
- char *leave = "\\$@nrtfbeacx0123456789[{]}lLuUE"; /* which backslash sequences to keep */
+ /* which backslash sequences to keep */
+ char *leave = (in_what & SCAN_TR)
+ ? "\\$@nrtfbeacx0123456789-"
+ : "\\$@nrtfbeacx0123456789[{]}lLuUE";
int len;
arg = op_new(1);
@@ -2025,7 +2153,7 @@ register char *s;
else
*d++ = *s++;
}
- if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
+ if (*s == '.' && s[1] != '.') {
*d++ = *s++;
while (isDIGIT(*s) || *s == '_') {
if (*s == '_')
@@ -2052,6 +2180,8 @@ register char *s;
arg[1].arg_ptr.arg_str = str;
break;
case '<':
+ if (in_what & (SCAN_REPL|SCAN_TR))
+ goto do_double;
if (*++s == '<') {
hereis = TRUE;
d = tokenbuf;
@@ -2091,18 +2221,19 @@ register char *s;
s = cpytill(d,s,bufend,'>',&len);
if (s < bufend)
s++;
+ else
+ fatal("Unterminated <> operator");
+
if (*d == '$') d++;
while (*d && (isALNUM(*d) || *d == '\''))
d++;
if (d - tokenbuf != len) {
- d = tokenbuf;
+ s = start;
+ term = *s;
arg[1].arg_type = A_GLOB;
- d = nsavestr(d,len);
- arg[1].arg_ptr.arg_stab = stab = genstab();
- stab_io(stab) = stio_new();
- stab_val(stab) = str_make(d,len);
- Safefree(d);
set_csh();
+ alwaysdollar = TRUE; /* treat $) and $| as variables */
+ goto snarf_it;
}
else {
d = tokenbuf;
@@ -2160,6 +2291,7 @@ register char *s;
snarf_it:
{
STR *tmpstr;
+ STR *tmpstr2 = Nullstr;
char *tmps;
CLINE;
@@ -2235,7 +2367,7 @@ register char *s;
tmpstr->str_len = tmpstr->str_cur + 1;
Renew(tmpstr->str_ptr, tmpstr->str_len, char);
}
- if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
+ if (arg[1].arg_type == A_SINGLE) {
arg[1].arg_ptr.arg_str = tmpstr;
break;
}
@@ -2259,22 +2391,41 @@ register char *s;
}
s = d = tmpstr->str_ptr; /* assuming shrinkage only */
while (s < send) {
- if ((*s == '$' && s+1 < send &&
- (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
- (*s == '@' && s+1 < send) ) {
- if (s[1] == '#' && (isALPHA(s[2]) || s[2] == '_'))
- *d++ = *s++;
- len = scanident(s,send,tokenbuf) - s;
- if (*s == '$' || strEQ(tokenbuf,"ARGV")
- || strEQ(tokenbuf,"ENV")
- || strEQ(tokenbuf,"SIG")
- || strEQ(tokenbuf,"INC") )
- (void)stabent(tokenbuf,TRUE); /* make sure it exists */
- while (len--)
- *d++ = *s++;
- continue;
+ if (in_what & SCAN_TR) {
+ if (*s != '\\' && s[1] == '-' && s+2 < send) {
+ int i;
+ if (!tmpstr2) { /* oops, have to grow */
+ tmpstr2 = str_smake(tmpstr);
+ s = tmpstr2->str_ptr + (s - tmpstr->str_ptr);
+ send = tmpstr2->str_ptr + (send - tmpstr->str_ptr);
+ }
+ i = d - tmpstr->str_ptr;
+ STR_GROW(tmpstr, tmpstr->str_len + 256);
+ d = tmpstr->str_ptr + i;
+ for (i = (s[0] & 0377); i <= (s[2] & 0377); i++)
+ *d++ = i;
+ s += 3;
+ continue;
+ }
}
- else if (*s == '\\' && s+1 < send) {
+ else {
+ if ((*s == '$' && s+1 < send &&
+ (alwaysdollar || /*(*/(s[1] != ')' && s[1] != '|')) ) ||
+ (*s == '@' && s+1 < send) ) {
+ if (s[1] == '#' && (isALPHA(s[2]) || s[2] == '_'))
+ *d++ = *s++;
+ len = scanident(s,send,tokenbuf) - s;
+ if (*s == '$' || strEQ(tokenbuf,"ARGV")
+ || strEQ(tokenbuf,"ENV")
+ || strEQ(tokenbuf,"SIG")
+ || strEQ(tokenbuf,"INC") )
+ (void)stabent(tokenbuf,TRUE); /* add symbol */
+ while (len--)
+ *d++ = *s++;
+ continue;
+ }
+ }
+ if (*s == '\\' && s+1 < send) {
s++;
switch (*s) {
default:
@@ -2327,12 +2478,20 @@ register char *s;
}
*d = '\0';
- if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
- arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
+ if (arg[1].arg_type == A_DOUBLE && makesingle)
+ arg[1].arg_type = A_SINGLE; /* now we can optimize on it */
tmpstr->str_cur = d - tmpstr->str_ptr;
- arg[1].arg_ptr.arg_str = tmpstr;
+ if (arg[1].arg_type == A_GLOB) {
+ arg[1].arg_ptr.arg_stab = stab = genstab();
+ stab_io(stab) = stio_new();
+ str_sset(stab_val(stab), tmpstr);
+ }
+ else
+ arg[1].arg_ptr.arg_str = tmpstr;
s = tmps;
+ if (tmpstr2)
+ str_free(tmpstr2);
break;
}
}
@@ -2564,6 +2723,7 @@ load_format()
return froot.f_next;
}
+static void
set_csh()
{
#ifdef CSH