diff options
author | Kriton Kyrimis <kyrimis@princeton.edu> | 1988-02-01 04:35:21 +0000 |
---|---|---|
committer | Larry Wall <lwall@jpl-devvax.jpl.nasa.gov> | 1988-02-01 04:35:21 +0000 |
commit | 9bb9d9f726fa55c70ed76abad9fe7c61d4eb4182 (patch) | |
tree | b31ea68d7f4dc2e2d30b150698df92735e4c251a | |
parent | 83b4785aebef542ad391e53b49c107fc5e1b4a58 (diff) | |
download | perl-9bb9d9f726fa55c70ed76abad9fe7c61d4eb4182.tar.gz |
perl 1.0 patch 12: scripts made by a2p doen't handle leading white space right on input
Awk ignores leading whitespace on split. Perl by default does not.
The a2p translator couldn't handle this. The fix is partly to a2p
and partly to perl. Perl now has a way to specify to split to
ignore leading white space as awk does. A2p now takes advantage of
that.
I also threw in an optimization that let's runtime patterns
compile just once if they are known to be constant, so that
split(' ') doesn't compile the pattern every time.
-rw-r--r-- | arg.c | 14 | ||||
-rw-r--r-- | patchlevel.h | 2 | ||||
-rw-r--r-- | perl.man.2 | 26 | ||||
-rw-r--r-- | x2p/a2p.h | 7 | ||||
-rw-r--r-- | x2p/walk.c | 13 |
5 files changed, 49 insertions, 13 deletions
@@ -1,6 +1,9 @@ -/* $Header: arg.c,v 1.0.1.5 88/01/30 08:53:16 root Exp $ +/* $Header: arg.c,v 1.0.1.6 88/02/01 17:32:26 root Exp $ * * $Log: arg.c,v $ + * Revision 1.0.1.6 88/02/01 17:32:26 root + * patch12: made split(' ') behave like awk in ignoring leading white space. + * * Revision 1.0.1.5 88/01/30 08:53:16 root * patch9: fixed some missing right parens introduced (?) by patch 2 * @@ -220,6 +223,15 @@ STR ***retary; char *d; m = str_get(eval(spat->spat_runtime,Null(STR***))); + if (!*m || (*m == ' ' && !m[1])) { + m = "[ \\t\\n]+"; + while (isspace(*s)) s++; + } + if (spat->spat_runtime->arg_type == O_ITEM && + spat->spat_runtime[1].arg_type == A_SINGLE) { + arg_free(spat->spat_runtime); /* it won't change, so */ + spat->spat_runtime = Nullarg; /* no point compiling again */ + } if (d = compile(&spat->spat_compex,m,TRUE,FALSE)) { #ifdef DEBUGGING deb("/%s/: %s\n", m, d); diff --git a/patchlevel.h b/patchlevel.h index 98702f8e84..bc5f1c8250 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -1 +1 @@ -#define PATCHLEVEL 11 +#define PATCHLEVEL 12 diff --git a/perl.man.2 b/perl.man.2 index 25f3c26469..05eb4a9130 100644 --- a/perl.man.2 +++ b/perl.man.2 @@ -1,7 +1,10 @@ ''' Beginning of part 2 -''' $Header: perl.man.2,v 1.0.1.2 88/01/30 17:04:28 root Exp $ +''' $Header: perl.man.2,v 1.0.1.3 88/02/01 17:33:03 root Exp $ ''' ''' $Log: perl.man.2,v $ +''' Revision 1.0.1.3 88/02/01 17:33:03 root +''' patch12: documented split more adequately. +''' ''' Revision 1.0.1.2 88/01/30 17:04:28 root ''' patch 11: random cleanup ''' @@ -333,8 +336,25 @@ Anything matching PATTERN is taken to be a delimiter separating the fields. (Note that the delimiter may be longer than one character.) Trailing null fields are stripped, which potential users of pop() would do well to remember. -A pattern matching the null string will split the value of EXPR into separate -characters. +A pattern matching the null string (not to be confused with a null pattern) +will split the value of EXPR into separate characters at each point it +matches that way. +For example: +.nf + + print join(':',split(/ */,'hi there')); + +.fi +produces the output 'h:i:t:h:e:r:e'. + +The pattern /PATTERN/ may be replaced with an expression to specify patterns +that vary at runtime. +As a special case, specifying a space ('\ ') will split on white space +just as split with no arguments does, but leading white space does NOT +produce a null first field. +Thus, split('\ ') can be used to emulate awk's default behavior, whereas +split(/\ /) will give you as many null initial fields as there are +leading spaces. .sp Example: .nf @@ -1,6 +1,9 @@ -/* $Header: a2p.h,v 1.0.1.1 88/01/26 09:52:30 root Exp $ +/* $Header: a2p.h,v 1.0.1.2 88/02/01 17:33:40 root Exp $ * * $Log: a2p.h,v $ + * Revision 1.0.1.2 88/02/01 17:33:40 root + * patch12: forgot to fix #define YYDEBUG; bug in a2p. + * * Revision 1.0.1.1 88/01/26 09:52:30 root * patch 5: a2p didn't use config.h. * @@ -226,7 +229,7 @@ EXT int expectterm INIT(TRUE); #ifdef DEBUGGING EXT int debug INIT(0); EXT int dlevel INIT(0); -#define YYDEBUG; +#define YYDEBUG 1 extern int yydebug; #endif diff --git a/x2p/walk.c b/x2p/walk.c index e745510b1d..e81a9fd8de 100644 --- a/x2p/walk.c +++ b/x2p/walk.c @@ -1,6 +1,9 @@ -/* $Header: walk.c,v 1.0.1.1 88/01/28 11:07:56 root Exp $ +/* $Header: walk.c,v 1.0.1.2 88/02/01 17:34:05 root Exp $ * * $Log: walk.c,v $ + * Revision 1.0.1.2 88/02/01 17:34:05 root + * patch12: made a2p take advantage of new awk-compatible split in perl. + * * Revision 1.0.1.1 88/01/28 11:07:56 root * patch8: changed some misleading comments. * @@ -71,7 +74,7 @@ int *numericptr; str_cat(str,"';\t\t# field separator from -F switch\n"); } else if (saw_FS && !const_FS) { - str_cat(str,"$FS = '[ \\t\\n]+';\t\t# set field separator\n"); + str_cat(str,"$FS = ' ';\t\t# set field separator\n"); } if (saw_OFS) { str_cat(str,"$, = ' ';\t\t# set output field separator\n"); @@ -361,8 +364,6 @@ sub Pick {\n\ str_scat(str,fstr=walk(1,level,ops[node+3].ival,&numarg)); str_free(fstr); numeric |= numarg; - if (strEQ(str->str_ptr,"$FS = '\240'")) - str_set(str,"$FS = '[\240\\n\\t]+'"); break; case OADD: str = walk(1,level,ops[node+1].ival,&numarg); @@ -511,7 +512,7 @@ sub Pick {\n\ else if (saw_FS) str_cat(str,"$FS"); else - str_cat(str,"/[ \\t\\n]+/"); + str_cat(str,"' '"); str_cat(str,", "); str_scat(str,fstr=walk(1,level,ops[node+1].ival,&numarg)); str_free(fstr); @@ -1095,7 +1096,7 @@ int level; else if (saw_FS) str_cat(str," = split($FS);\n"); else - str_cat(str," = split;\n"); + str_cat(str," = split(' ');\n"); tab(str,level); } |