Major rewrite of s2p. And I mean really major, it is

an implementation of sed in perl. If called as s2p it will function as s2p. Note: needs non-UNIXifying. Subject: s2p Message-ID: <39AA5578.2102E2AA@alcatel.at> p4raw-id: //depot/perl@7040
author: Wolfgang Laun <Wolfgang.Laun@alcatel.at> 2000-08-28 16:05:12 +0200
committer: Jarkko Hietaniemi <jhi@iki.fi> 2000-09-08 14:34:18 +0000
commit: 86a59229552fc742ed09795227799fb41537f06f (patch)
tree: 996057e7273840293fd520f949ed9563c37677e4 /x2p
parent: 33b454808819084359e76a3f223a41b842c180b7 (diff)
download: perl-86a59229552fc742ed09795227799fb41537f06f.tar.gz
1 files changed, 1794 insertions, 690 deletions
diff --git a/x2p/s2p.PL b/x2p/s2p.PL
index 4f7bf8c724..21a5ee8927 100644
--- a/x2p/s2p.PL
+++ b/x2p/s2p.PL
@@ -1,4 +1,4 @@
-#!/usr/local/bin/perl
+#!/usr/bin/perl
 
 use Config;
 use File::Basename qw(&basename &dirname);
@@ -29,823 +29,1927 @@ print OUT <<"!GROK!THIS!";
 $Config{startperl}
     eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}'
 	if \$running_under_some_shell;
-\$startperl = "$Config{startperl}";
-\$perlpath = "$Config{perlpath}";
+my \$startperl = "$Config{startperl}";
+my \$perlpath  = "$Config{perlpath}";
 !GROK!THIS!
 
 # In the following, perl variables are not expanded during extraction.
 
 print OUT <<'!NO!SUBS!';
 
-# $RCSfile: s2p.SH,v $$Revision: 4.1 $$Date: 92/08/07 18:29:23 $
-#
-# $Log:	s2p.SH,v $
+$0 =~ s/^.*?(\w+)$/$1/;
+
+# (p)sed - a stream editor
+# History:  Aug 12 2000: Original version.
+
+use strict;
+use integer;
+use Symbol;
 
 =head1 NAME
 
-s2p - Sed to Perl translator
+sed - a stream editor
 
 =head1 SYNOPSIS
 
-B<s2p [options] filename>
+   sed [-an] script [file ...]
+   sed [-an] [-e script] [-f script-file] [file ...]
 
 =head1 DESCRIPTION
 
-I<s2p> takes a sed script specified on the command line (or from
-standard input) and produces a comparable I<perl> script on the
-standard output.
+A stream editor reads the input stream consisting of the specified files
+(or standard input, if none are given), processes is line by line by
+applying a script consisting of edit commands, and writes resulting lines
+to standard output. The filename `C<->' may be used to read standard input.
+
+The edit script is composed from arguments of B<-e> options and
+script-files, in the given order. A single script argument may be specified
+as the first parameter.
+
+If this program is invoked with the name F<s2p>, it will act as a
+sed-to-Perl translator. See L<"sed Script Translation">.
+
+B<sed> returns an exit code of 0 on success or >0 if an error occurred.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-a>
 
-=head2 Options
+A file specified as argument to the B<w> edit command is by default
+opened before input processing starts. Using B<-a>, opening of such
+files is delayed until the first line is actually written to the file.
 
-Options include:
+=item B<-e> I<script>
 
-=over 5
+The editing commands defined by I<script> are appended to the script.
+Multiple commands must be separated by newlines.
 
-=item B<-DE<lt>numberE<gt>>
+=item B<-f> I<script-file>
 
-sets debugging flags.
+Editing commands from the specified I<script-file> are read and appended
+to the script.
 
 =item B<-n>
 
-specifies that this sed script was always invoked with a B<sed -n>.
-Otherwise a switch parser is prepended to the front of the script.
+By default, a line is written to standard output after the editing script
+has been applied to it. The B<-n> option suppresses automatic printing.
+
+=back
+
+=head1 COMMANDS
+
+B<sed> command syntax is defined as
+
+Z<> Z<> Z<> Z<>[I<address>[B<,>I<address>]][B<!>]I<function>[I<argument>]
+
+with whitespace being permitted before or after addresses, and between
+the function character and the argument. The I<address>es and the
+address inverter (C<!>) are used to restrict the application of a
+command to the selected line(s) of input.
+
+Each command must be on a line of its own, except where noted in
+the synopses below.
+
+The edit cycle performed on each input line consist of reading the line
+(without its trailing newline character) into the I<pattern space>,
+applying the applicable commands of the edit script, writing the final
+contents of the pattern space and a newline to the standard output.
+A I<hold space> is provided for saving the contents of the
+pattern space for later use.
 
-=item B<-p>
+=head2 Addresses
 
-specifies that this sed script was never invoked with a B<sed -n>.
-Otherwise a switch parser is prepended to the front of the script.
+A sed address is either a line number or a pattern, which may be combined
+arbitrarily to construct ranges. Lines are numbered across all input files.
+
+Any address may be followed by an exclamation mark (`C<!>'), selecting
+all lines not matching that address.
+
+=over 4
+
+=item I<number>
+
+The line with the given number is selected.
+
+=item B<$>
+
+A dollar sign (C<$>) is the line number of the last line of the input stream.
+
+=item B</>I<regular expression>B</>
+
+A pattern address is a basic regular expression (see 
+L<"Basic Regular Expressions">), between the delimiting character C</>.
+Any other character except C<\> or newline may be used to delimit a
+pattern address when the initial delimiter is prefixed with a
+backslash (`C<\>').
 
 =back
 
-=head2 Considerations
+If no address is given, the command selects every line.
 
-The perl script produced looks very sed-ish, and there may very well
-be better ways to express what you want to do in perl.  For instance,
-s2p does not make any use of the split operator, but you might want
-to.
+If one address is given, it selects the line (or lines) matching the
+address.
 
-The perl script you end up with may be either faster or slower than
-the original sed script.  If you're only interested in speed you'll
-just have to try it both ways.  Of course, if you want to do something
-sed doesn't do, you have no choice.  It's often possible to speed up
-the perl script by various methods, such as deleting all references to
-$\ and chop.
+Two addresses select a range that begins whenever the first address
+matches, and ends (including that line) when the second address matches.
+If the first (second) address is a matching pattern, the second 
+address is not applied to the very same line to determine the end of
+the range. Likewise, if the second address is a matching pattern, the
+first address is not applied to the very same line to determine the
+begin of another range. If both addresses are line numbers,
+and the second line number is less than the first line number, then
+only the first line is selected.
 
-=head1 ENVIRONMENT
 
-s2p uses no environment variables.
+=head2 Functions
 
-=head1 AUTHOR
+The maximum permitted number of addresses is indicated with each
+function synopsis below.
 
-Larry Wall E<lt>F<larry@wall.org>E<gt>
+The argument I<text> consists of one or more lines following the command.
+Embedded newlines in I<text> must be preceded with a backslash.  Other
+backslashes in I<text> are deleted and the following character is taken
+literally.
 
-=head1 FILES
+=over 4
 
-=head1 SEE ALSO
+=cut
 
- perl	The perl compiler/interpreter
+my %ComTab;
+#--------------------------------------------------------------------------
+$ComTab{'a'}=[ 1, 'txt', \&Emit,       '{ push( @Q, <<'."'TheEnd' ) }\n" ]; #ok
 
- a2p	awk to perl translator
+=item [1addr]B<a\> I<text>
 
-=head1 DIAGNOSTICS
+Write I<text> (which must start on the line following the command)
+to standard output immediately before reading the next line
+of input, either by executing the B<N> function or by beginning a new cycle.
 
-=head1 BUGS
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'b'}=[ 2, 'str', \&Branch,     '{ goto XXX; }'                   ]; #ok
+
+=item [2addr]B<b> [I<label>]
+
+Branch to the B<:> function with the specified I<label>. If no label
+is given, branch to the end of the script.
 
 =cut
 
-$indent = 4;
-$shiftwidth = 4;
-$l = '{'; $r = '}';
+#--------------------------------------------------------------------------
+$ComTab{'c'}=[ 2, 'txt', \&Change,     <<'-X-'                           ]; #ok
+{ print <<'TheEnd'; } $doPrint = 0; goto EOS;
+-X-
+### continue OK => next CYCLE;
 
-while ($ARGV[0] =~ /^-/) {
-    $_ = shift;
-  last if /^--/;
-    if (/^-D/) {
-	$debug++;
-	open(BODY,'>-');
-	next;
+=item [2addr]B<c\> I<text>
+
+The line, or range of lines, selected by the address is deleted. 
+The I<text> (which must start on the line following the command)
+is written to standard output. With an address range, this occurs at
+the end of the range.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'d'}=[ 2, '',    \&Emit,       <<'-X-'                           ]; #ok
+{ $doPrint = 0;
+  goto EOS;
+}
+-X-
+### continue OK => next CYCLE;
+
+=item [2addr]B<d>
+
+Deletes the pattern space and starts the next cycle.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'D'}=[ 2, '',    \&Emit,       <<'-X-'                           ]; #ok
+{ s/^.*\n?//;
+  if(length($_)){ goto BOS } else { goto EOS }
+}
+-X-
+### continue OK => next CYCLE;
+
+=item [2addr]B<D>
+
+Deletes the pattern space through the first embedded newline or to the end.
+If the pattern space becomes empty, a new cycle is started, otherwise
+execution of the script is restarted.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'g'}=[ 2, '',    \&Emit,       '{ $_ = $Hold };'                 ]; #ok
+
+=item [2addr]B<g>
+
+Replace the contents of the pattern space with the hold space.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'G'}=[ 2, '',    \&Emit,       '{ $_ .= "\n"; $_ .= $Hold };'    ]; #ok
+
+=item [2addr]B<G>
+
+Append a newline and the contents of the hold space to the pattern space.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'h'}=[ 2, '',    \&Emit,       '{ $Hold = $_ }'                  ]; #ok
+
+=item [2addr]B<h>
+
+Replace the contents of the hold space with the pattern space.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'H'}=[ 2, '',    \&Emit,       '{ $Hold .= "\n"; $Hold .= $_; }' ]; #ok
+
+=item [2addr]B<H>
+
+Append a newline and the contents of the pattern space to the hold space.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'i'}=[ 1, 'txt', \&Emit,       '{ print <<'."'TheEnd' }\n"       ]; #ok
+
+=item [1addr]B<i\> I<text>
+
+Write the I<text> (which must start on the line following the command)
+to standard output.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'l'}=[ 2, '',    \&Emit,       '{ _l() }'                        ]; #okUTF8
+
+=item [2addr]B<l>
+
+Print the contents of the pattern space: non-printable characters are
+shown in C-style escaped form; long lines are split and have a trailing
+`C<\>' at the point of the split; the true end of a line is marked with
+a `C<$>'. Escapes are: `\a', `\t', `\n', `\f', `\r', `\e' for
+BEL, HT, LF, FF, CR, ESC, respectively, and `\' followed by a three-digit
+octal number for all other non-printable characters.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'n'}=[ 2, '',    \&Emit,       <<'-X-'                           ]; #ok
+{ print $_, "\n" if $doPrint;
+  printQ if @Q;
+  $CondReg = 0;
+  last CYCLE unless getsARGV();
+  chomp();
+}
+-X-
+
+=item [2addr]B<n>
+
+If automatic printing is enabled, write the pattern space to the standard
+output. Replace the pattern space with the next line of input. If
+there is no more input, processing is terminated.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'N'}=[ 2, '',    \&Emit,       <<'-X-'                           ]; #ok
+{ printQ if @Q;
+  $CondReg = 0;
+  last CYCLE unless getsARGV( $h );
+  chomp( $h );
+  $_ .= "\n$h";
+}
+-X-
+
+=item [2addr]B<N>
+
+Append a newline and the next line of input to the pattern space. If
+there is no more input, processing is terminated.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'p'}=[ 2, '',    \&Emit,       '{ print $_, "\n"; }'             ]; #ok
+
+=item [2addr]B<p>
+
+Print the pattern space to the standard output. (Use the B<-n> option
+to suppress automatic printing at the end of a cycle if you want to
+avoid double printing of lines.)
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'P'}=[ 2, '',    \&Emit,       <<'-X-'                           ]; #ok
+{ if( /^(.*)/ ){ print $1, "\n"; } }
+-X-
+
+=item [2addr]B<P>
+
+Prints the pattern space through the first embedded newline or to the end.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'q'}=[ 1, '',    \&Emit,       <<'-X-'                           ]; #ok
+{ print $_, "\n" if $doPrint;
+  last CYCLE;
+}
+-X-
+
+=item [1addr]B<q>
+
+Branch to the end of the script and quit without starting a new cycle.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'r'}=[ 1, 'str', \&Emit,       "{ _r( '-X-' ) }"                 ]; #ok
+### FIXME: lazy reading - big files???
+
+=item [1addr]B<r> I<file>
+
+Copy the contents of the I<file> to standard output immediately before
+the next attempt to read a line of input. Any error encountered while
+reading I<file> is silently ignored.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'s'}=[ 2, 'sub', \&Emit,       ''                                ]; #ok
+
+=item [2addr]B<s/>I<regular expression>B</>I<replacement>B</>I<flags>
+
+Substitute the I<replacement> string for the first substring in
+the pattern space that matches the I<regular expression>.
+Any character other than backslash or newline can be used instead of a 
+slash to delimit the regular expression and the replacement.
+To use the delimiter as a literal character within the regular expression
+and the replacement, precede the character by a backslash (`C<\>').
+
+Literal newlines may be embedded in the replacement string by
+preceding a newline with a backslash.
+
+Within the replacement, an ampersand (`C<&>') is replaced by the string
+matching the regular expression. The strings `C<\1>' through `C<\9>' are
+replaced by the corresponding subpattern (see L<"Basic Regular Expressions">).
+To get a literal `C<&>' or `C<\>' in the replacement text, precede it
+by a backslash.
+
+The following I<flags> modify the behaviour of the B<s> command:
+
+=over 8
+
+=item B<g>
+
+The replacement is performed for all matching, non-overlapping substrings
+of the pattern space.
+
+=item B<1>..B<9>
+
+Replace only the n-th matching substring of the pattern space.
+
+=item B<p>
+
+If the substitution was made, print the new value of the pattern space.
+
+=item B<w> I<file>
+
+If the substitution was made, write the new value of the pattern space
+to the specified file.
+
+=back
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'t'}=[ 2, 'str', \&Branch,     '{ goto XXX if _t() }'            ]; #ok
+
+=item [2addr]B<t> [I<label>]
+
+Branch to the B<:> function with the specified I<label> if any B<s>
+substitutions have been made since the most recent reading of an input line
+or execution of a B<t> function. If no label is given, branch to the end of
+the script. 
+
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'w'}=[ 2, 'str', \&Write,      "{ _w( '-X-' ) }"                 ]; #ok
+
+=item [2addr]B<w> I<file>
+
+The contents of the pattern space are written to the I<file>.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'x'}=[ 2, '',    \&Emit,       '{ ($Hold, $_) = ($_, $Hold) }'   ]; #ok
+
+=item [2addr]B<x>
+
+Swap the contents of the pattern space and the hold space.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'y'}=[ 2, 'tra', \&Emit,       ''                                ]; #ok
+=item [2addr]B<y>B</>I<string1>B</>I<string2>B</>
+
+In the pattern space, replace all characters occuring in I<string1> by the
+character at the corresponding position in I<string2>. It is possible
+to use any character (other than a backslash or newline) instead of a
+slash to delimit the strings.  Within I<string1> and I<string2>, a
+backslash followed by any character other than a newline is that literal
+character, and a backslash followed by an `n' is replaced by a newline
+character.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'='}=[ 1, '',    \&Emit,       '{ print "$.\n" }'                ]; #ok
+
+=item [1addr]B<=>
+
+Prints the current line number on the standard output.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{':'}=[ 0, 'str', \&Label,      ''                                ]; #ok
+ 
+=item [0addr]B<:> [I<label>]
+
+The command specifies the position of the I<label>. It has no other effect.
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'{'}=[ 2, '',    \&BeginBlock, '{'                               ]; #ok
+$ComTab{'}'}=[ 0, '',    \&EndBlock,   ';}'                              ]; #ok
+# ';' to avoid warning on empty {}-block
+
+=item [2addr]B<{> [I<command>]
+
+=item [0addr]B<}>
+
+These two commands begin and end a command list. The first command may
+be given on the same line as the opening B<{> command. The commands
+within the list are jointly selected by the address(es) given on the
+B<{> command (but may still have individual addresses).
+
+=cut
+
+#--------------------------------------------------------------------------
+$ComTab{'#'}=[ 0, 'str', \&Comment,    ''                                ]; #ok
+
+=item [0addr]B<#> [I<comment>]
+
+The entire line is ignored (treated as a comment). If, however, the first
+two characters in the script are `C<#n>', automatic printing of output is
+suppressed, as if the B<-n> option were given on the command line.
+
+=back
+
+=cut
+
+use vars qw{ $isEOF $Hold %wFiles @Q $CondReg $doPrint };
+
+my $useDEBUG    = exists( $ENV{PSEDDEBUG} );
+my $useEXTBRE   = $ENV{PSEDEXTBRE} || '';
+$useEXTBRE =~ s/[^<>wWyB]//g; # gawk RE's handle these
+
+my $doAutoPrint = 1;          # automatic printing of pattern space (-n => 0)
+my $doOpenWrite = 1;          # open w command output files at start (-a => 0)
+my $svOpenWrite = 0;          # save $doOpenWrite
+my $doGenerate  = $0 eq 's2p';
+
+# Collected and compiled script
+#
+my( @Commands, %Defined, @BlockStack, %Label, $labNum, $Code );
+
+##################
+#  Compile Time
+#
+# Labels
+# 
+# Error handling
+#
+sub Warn($;$){
+    my( $msg, $loc ) = @_;
+    $loc ||= '';
+    $loc .= ': ' if length( $loc );
+    warn( "$0: $loc$msg\n" );
+}
+
+$labNum = 0;
+sub newLabel(){
+    return 'L_'.++$labNum;
+}
+
+# safeHere: create safe here delimiter and  modify opcode and argument
+#
+sub safeHere($$){
+    my( $codref, $argref ) = @_;
+    my $eod = 'EOD000';
+    while( $$argref =~ /^$eod$/m ){
+        $eod++;
     }
-    if (/^-n/) {
-	$assumen++;
-	next;
+    $$codref =~ s/TheEnd/$eod/e;
+    $$argref .= "$eod\n"; 
+}
+
+# Emit: create address logic and emit command
+#
+sub Emit($$$$$$){
+    my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
+    my $cond = '';
+    if( defined( $addr1 ) ){
+        if( defined( $addr2 ) ){
+	    $addr1 .= $addr2 =~ /^\d+$/ ? "..$addr2" : "...$addr2";
+        } else {
+	    $addr1 .= ' == $.' if $addr1 =~ /^\d+$/;
+	}
+	$cond = $negated ? "unless( $addr1 )\n" : "if( $addr1 )\n";
     }
-    if (/^-p/) {
-	$assumep++;
-	next;
+
+    if( $opcode eq '' ){
+	$Code .= "$cond$arg\n";
+
+    } elsif( $opcode =~ s/-X-/$arg/e ){
+	$Code .= "$cond$opcode\n";
+
+    } elsif( $opcode =~ /TheEnd/ ){
+	safeHere( \$opcode, \$arg );
+	$Code .= "$cond$opcode$arg";
+
+    } else {
+	$Code .= "$cond$opcode\n";
     }
-    die "I don't recognize this switch: $_\n";
-}
-
-unless ($debug) {
-    open(BODY,"+>/tmp/sperl$$") ||
-      &Die("Can't open temp file: $!\n");
-}
-
-if (!$assumen && !$assumep) {
-    print BODY &q(<<'EOT');
-:	while ($ARGV[0] =~ /^-/) {
-:	    $_ = shift;
-:	  last if /^--/;
-:	    if (/^-n/) {
-:		$nflag++;
-:		next;
-:	    }
-:	    die "I don't recognize this switch: $_\\n";
-:	}
-:	
-EOT
-}
-
-print BODY &q(<<'EOT');
-:	#ifdef PRINTIT
-:	#ifdef ASSUMEP
-:	$printit++;
-:	#else
-:	$printit++ unless $nflag;
-:	#endif
-:	#endif
-:	<><>
-:	$\ = "\n";		# automatically add newline on print
-:	<><>
-:	#ifdef TOPLABEL
-:	LINE:
-:	while (chop($_ = <>)) {
-:	#else
-:	LINE:
-:	while (<>) {
-:	    chop;
-:	#endif
-EOT
-
-LINE:
-while (<>) {
-
-    # Wipe out surrounding whitespace.
-
-    s/[ \t]*(.*)\n$/$1/;
-
-    # Perhaps it's a label/comment.
-
-    if (/^:/) {
-	s/^:[ \t]*//;
-	$label = &make_label($_);
-	if ($. == 1) {
-	    $toplabel = $label;
-	    if (/^(top|(re)?start|redo|begin(ning)|again|input)$/i) {
-		$_ = <>;
-		redo LINE; # Never referenced, so delete it if not a comment.
+    0;
+}
+
+# Write (w command, w flag): store pathname
+#
+sub Write($$$$$$){
+    my( $addr1, $addr2, $negated, $opcode, $path, $fl ) = @_;
+    $wFiles{$path} = '';
+    Emit( $addr1, $addr2, $negated, $opcode, $path, $fl );
+}
+
+
+# Label (: command): label definition
+#
+sub Label($$$$$$){
+    my( $addr1, $addr2, $negated, $opcode, $lab, $fl ) = @_;
+    my $rc = 0;
+    $lab =~ s/\s+//;
+    if( length( $lab ) ){
+	my $h;
+	if( ! exists( $Label{$lab} ) ){
+	    $h = $Label{$lab}{name} = newLabel();
+        } else {
+	    $h = $Label{$lab}{name};
+	    if( exists( $Label{$lab}{defined} ) ){
+		my $dl = $Label{$lab}{defined};
+		Warn( "duplicate label $lab (first defined at $dl)", $fl );
+		$rc = 1;
 	    }
 	}
-	$_ = "$label:";
-	if ($lastlinewaslabel++) {
-	    $indent += 4;
-	    print BODY &tab, ";\n";
-	    $indent -= 4;
-	}
-	if ($indent >= 2) {
-	    $indent -= 2;
-	    $indmod = 2;
-	}
-	next;
-    } else {
-	$lastlinewaslabel = '';
+        $Label{$lab}{defined} = $fl;
+	$Code .= "$h:;\n";
     }
+    $rc;
+}
 
-    # Look for one or two address clauses
+# BeginBlock ({ command): push block start
+#
+sub BeginBlock($$$$$$){
+    my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
+    push( @BlockStack, [ $fl, $addr1, $addr2, $negated ] );
+    Emit( $addr1, $addr2, $negated, $opcode, $arg, $fl );
+}
 
-    $addr1 = '';
-    $addr2 = '';
-    if (s/^([0-9]+)//) {
-	$addr1 = "$1";
-	$addr1 = "\$. == $addr1" unless /^,/;
-    }
-    elsif (s/^\$//) {
-	$addr1 = 'eof()';
-    }
-    elsif (s|^/||) {
-	$addr1 = &fetchpat('/');
+# EndBlock (} command): check proper nesting
+#
+sub EndBlock($$$$$$){
+    my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
+    my $rc;
+    my $jcom = pop( @BlockStack );
+    if( defined( $jcom ) ){
+	$rc = Emit( $addr1, $addr2, $negated, $opcode, $arg, $fl );
+    } else {
+	Warn( "unexpected `}'", $fl );
+	$rc = 1;
     }
-    if (s/^,//) {
-	if (s/^([0-9]+)//) {
-	    $addr2 = "$1";
-	} elsif (s/^\$//) {
-	    $addr2 = "eof()";
-	} elsif (s|^/||) {
-	    $addr2 = &fetchpat('/');
-	} else {
-	    &Die("Invalid second address at line $.\n");
+    $rc;
+}
+
+# Branch (t, b commands): check or create label, substitute default
+#
+sub Branch($$$$$$){
+    my( $addr1, $addr2, $negated, $opcode, $lab, $fl ) = @_;
+    $lab =~ s/\s+//; # no spaces at end
+    my $h;
+    if( length( $lab ) ){
+	if( ! exists( $Label{$lab} ) ){
+	    $h = $Label{$lab}{name} = newLabel();
+        } else {
+	    $h = $Label{$lab}{name};
 	}
-	if ($addr2 =~ /^\d+$/) {
-	    $addr1 .= "..$addr2";
+	push( @{$Label{$lab}{used}}, $fl );
+    } else {
+	$h = 'EOS';
+    }
+    $opcode =~ s/XXX/$h/e;
+    Emit( $addr1, $addr2, $negated, $opcode, '', $fl );
+}
+
+# Change (c command): is special due to range end watching
+#
+sub Change($$$$$$){
+    my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
+    my $kwd = $negated ? 'unless' : 'if';
+    if( defined( $addr2 ) ){
+        $addr1 .= $addr2 =~ /^\d+$/ ? "..$addr2" : "...$addr2";
+	if( ! $negated ){
+	    $addr1  = '$icnt = ('.$addr1.')';
+	    $opcode = 'if( $icnt =~ /E0$/ )' . $opcode;
 	}
-	else {
-	    $addr1 .= "...$addr2";
+    } else {
+	$addr1 .= ' == $.' if $addr1 =~ /^\d+$/;
+    }
+    safeHere( \$opcode, \$arg );
+    $Code .= "$kwd( $addr1 ){\n  $opcode$arg}\n";
+    0;
+}
+
+
+# Comment (# command): A no-op. Who would've thought that!
+#
+sub Comment($$$$$$){
+    my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
+### $Code .= "# $arg\n";
+    0;
+}
+
+
+sub stripRegex($$){
+    my( $del, $sref ) = @_;
+    my $regex = $del;
+    print "stripRegex:$del:$$sref:\n" if $useDEBUG;
+    while( $$sref =~ s{^(.*?)(\\*)\Q$del\E(\s*)}{}s ){
+        my $sl = $2;
+	$regex .= $1.$sl.$del;
+	if( length( $sl ) % 2 == 0 ){
+	    return $regex;
 	}
+	$regex .= $3;
     }
+    undef();
+}
 
-    # Now we check for metacommands {, }, and ! and worry
-    # about indentation.
+# stripTrans: take a <del> terminated string from y command
+#   honoring and cleaning up of \-escaped <del>'s
+#
+sub stripTrans($$){
+    my( $del, $sref ) = @_;
+    my $t = '';
+    print "stripTrans:$del:$$sref:\n" if $useDEBUG;
+    while( $$sref =~ s{^(.*?)(\\*)\Q$del\E}{}s ){
+        my $sl = $2;
+	$t .= $1;
+	if( length( $sl ) % 2 == 0 ){
+	    $t .= $sl;
+	    $t =~ s/\\\\/\\/g;
+	    return $t;
+	}
+	chop( $sl );
+	$t .= $sl.$del.$3;
+    }
+    undef();
+}
 
-    s/^[ \t]+//;
-    # a { to keep vi happy
-    if ($_ eq '}') {
-	$indent -= 4;
-	next;
+# makey - construct Perl y/// from sed y///
+#
+sub makey($$$){
+    my( $fr, $to, $fl ) = @_;
+    my $error = 0;
+
+    # Ensure that any '-' is up front.
+    # Diagnose duplicate contradicting mappings
+    my %tr;
+    for( my $i = 0; $i < length($fr); $i++ ){
+	my $fc = substr($fr,$i,1);
+	my $tc = substr($to,$i,1);
+	if( exists( $tr{$fc} ) && $tr{$fc} ne $tc ){
+	    Warn( "ambiguos translation for character `$fc' in `y' command",
+		  $fl );
+	    $error++;
+	}
+	$tr{$fc} = $tc;
     }
-    if (s/^!//) {
-	$if = 'unless';
-	$else = "$r else $l\n";
+    $fr = $to = '';
+    if( exists( $tr{'-'} ) ){
+	( $fr, $to ) = ( '-', $tr{'-'} );
+	delete( $tr{'-'} );
     } else {
-	$if = 'if';
-	$else = '';
-    }
-    if (s/^{//) {	# a } to keep vi happy
-	$indmod = 4;
-	$redo = $_;
-	$_ = '';
-	$rmaybe = '';
+	$fr = $to = '';
+    }
+    # might just as well sort it...
+    for my $fc ( sort keys( %tr ) ){
+	$fr .= $fc;
+	$to .= $tr{$fc};
+    }
+    # make embedded delimiters and newlines safe
+    $fr =~ s/([{}])/\$1/g;
+    $to =~ s/([{}])/\$1/g;
+    $fr =~ s/\n/\\n/g;
+    $to =~ s/\n/\\n/g;
+    return $error ? undef() : "{ y{$fr}{$to}; }";
+}
+
+######
+# makes - construct Perl s/// from sed s///
+#
+sub makes($$$$$$$){
+    my( $regex, $subst, $path, $global, $print, $nmatch, $fl ) = @_;
+
+    # make embedded newlines safe
+    $regex =~ s/\n/\\n/g;
+    $subst =~ s/\n/\\n/g;
+ 
+    my $code;
+    # n-th occurrence
+    #
+    if( length( $nmatch ) ){
+	$code = <<TheEnd;
+{ \$n = $nmatch;
+  while( --\$n && ( \$s = m ${regex}g ) ){}
+  \$s = ( substr( \$_, pos() ) =~ s ${regex}${subst}s ) if \$s;
+  \$CondReg ||= \$s;
+TheEnd
     } else {
-	$rmaybe = "\n$r";
-	if ($addr2 || $addr1) {
-	    $space = ' ' x $shiftwidth;
-	} else {
-	    $space = '';
-	}
-	$_ = &transmogrify();
+        $code = <<TheEnd;
+{ \$s = s ${regex}${subst}s${global};
+  \$CondReg ||= \$s;
+TheEnd
+    }
+    if( $print ){
+        $code .= '  print $_, "\n" if $s;'."\n";
     }
+    if( defined( $path ) ){
+        $wFiles{$path} = '';
+	$code .= " _w( '$path' ) if \$s;\n";
+    }
+    $code .= "}";
+}
 
-    # See if we can optimize to modifier form.
+=head1 BASIC REGULAR EXPRESSIONS
 
-    if ($addr1) {
-	if ($_ !~ /[\n{}]/ && $rmaybe && !$change &&
-	  $_ !~ / if / && $_ !~ / unless /) {
-	    s/;$/ $if $addr1;/;
-	    $_ = substr($_,$shiftwidth,1000);
-	} else {
-	    $_ = "$if ($addr1) $l\n$change$_$rmaybe";
-	}
-	$change = '';
-	next LINE;
-    }
-} continue {
-    @lines = split(/\n/,$_);
-    for (@lines) {
-	unless (s/^ *<<--//) {
-	    print BODY &tab;
+A I<Basic Regular Expression> (BRE), as defined in POSIX 1003.2, consists
+of I<atoms>, for matching parts of a string, and I<bounds>, specifying
+repetitions of a preceding atom.
+
+=head2 Atoms
+
+The possible atoms of a BRE are: B<.>, matching any single character;
+B<^> and B<$>, matching the null string at the beginning or end
+of a string, respectively; a I<bracket expressions>, enclosed
+in B<[> and B<]> (see below); and any single character with no
+other significance (matching that character). A B<\> before one
+of: B<.>, B<^>, B<$>, B<[>, B<*>, B<\>, matching the character
+after the backslash. A sequence of atoms enclosed in B<\(> and B<\)>
+becomes an atom and establishes the target for a I<backreference>,
+consisting of the substring that actually matches the enclosed atoms.
+Finally, B<\> followed by one of the digits B<0> through B<9> is a
+backreference.
+
+A B<^> that is not first, or a B<$> that is not last does not have
+a special significance and need not be preceded by a backslash to
+become literal. The same is true for a B<]>, that does not terminate
+a bracket expression.
+
+An unescaped backslash cannot be last in a BRE.
+
+=head2 Bounds
+
+The BRE bounds are: B<*>, specifying 0 or more matches of the preceding
+atom; B<\{>I<count>B<\}>, specifying that many repetitions;
+B<\{>I<minimum>B<,\}>, giving a lower limit; and
+B<\{>I<minimum>B<,>I<maximum>B<\}> finally defines a lower and upper
+bound. 
+
+A bound appearing as the first item in a BRE is taken literally.
+
+=head2 Bracket Expressions
+
+A I<bracket expression> is a list of characters, character ranges
+and character classes enclosed in B<[> and B<]> and matches any
+single character from the represented set of characters.
+
+A character range is written as two characters separated by B<-> and
+represents all characters (according to the character collating sequence)
+that are not less than the first and not greater than the second.
+(Ranges are very collating-sequence-dependent, and portable programs
+should avoid relying on them.)
+
+A character class is one of the class names
+
+   alnum     digit     punct
+   alpha     graph     space
+   blank     lower     upper
+   cntrl     print     xdigit
+
+enclosed in B<[:> and B<:]> and represents the set of characters
+as defined in ctype(3).
+
+If the first character after B<[> is B<^>, the sense of matching is
+inverted.
+
+To include a literal `C<^>', place it anywhere else but first. To
+include a literal 'C<]>' place it first or immediately after an
+initial B<^>. To include a literal `C<->' make it the first (or
+second after B<^>) or last character, or the second endpoint of
+a range.
+
+The special bracket expression constructs C<[[:E<lt>:]]> and C<[[:E<gt>:]]> 
+match the null string at the beginning and end of a word respectively.
+(Note that neither is identical to Perl's `\b' atom.)
+
+=head2 Additional Atoms
+
+Since some sed implementations provide additional regular expression
+atoms (not defined in POSIX 1003.2), B<psed> is capable of translating
+the following backslash escapes:
+
+=over 4
+
+=item B<\E<lt>> This is the same as C<[[:E<gt>:]]>.
+
+=item B<\E<gt>> This is the same as C<[[:E<lt>:]]>.
+
+=item B<\w> This is an abbreviation for C<[[:alnum:]_]>.
+
+=item B<\W> This is an abbreviation for C<[^[:alnum:]_]>.
+
+=item B<\y> Match the empty string at a word boundary.
+
+=item B<\B> Match the empty string between any two either word or non-word characters.
+
+=back
+
+To enable this feature, the environment variable PSEDEXTBRE must be set
+to a string containing the requested characters, e.g.:
+C<PSEDEXTBRE='E<lt>E<gt>wW'>.
+
+=cut
+
+#####
+# bre2p - convert BRE to Perl RE
+#
+sub peek(\$$){
+    my( $pref, $ic ) = @_;
+    $ic < length($$pref)-1 ? substr( $$pref, $ic+1, 1 ) : '';
+}
+
+sub bre2p($$$){
+    my( $del, $pat, $fl ) = @_;
+    my $led = $del;
+    $led =~ tr/{([</})]>/;
+    $led = '' if $led eq $del;
+
+    $pat = substr( $pat, 1, length($pat) - 2 );
+    my $res = '';
+    my $bracklev = 0;
+    my $backref  = 0;
+    my $parlev = 0;
+    for( my $ic = 0; $ic < length( $pat ); $ic++ ){
+        my $c = substr( $pat, $ic, 1 );
+        if( $c eq '\\' ){
+	    ### backslash escapes
+            my $nc = peek($pat,$ic);
+            if( $nc eq '' ){
+                Warn( "`\\' cannot be last in pattern", $fl );
+                return undef();
+            }
+	    $ic++;
+            if( $nc eq $del ){ ## \<pattern del> => \<pattern del>
+                $res .= "\\$del";
+
+	    } elsif( $nc =~ /([[.*\\n])/ ){
+		## check for \-escaped magics and \n:
+		## \[ \. \* \\ \n stay as they are
+                $res .= '\\'.$nc;
+
+            } elsif( $nc eq '(' ){ ## \( => (
+                $parlev++;
+                $res .= '(';
+
+            } elsif( $nc eq ')' ){ ## \) => )
+                $parlev--;
+		$backref++;
+                if( $parlev < 0 ){
+                    Warn( "unmatched `\\)'", $fl );
+                    return undef();
+                }
+                $res .= ')';
+
+            } elsif( $nc eq '{' ){ ## repetition factor \{<i>[,[<j>]]\}
+                my $endpos = index( $pat, '\\}', $ic );
+                if( $endpos < 0 ){
+                    Warn( "unmatched `\\{'", $fl );
+                    return undef();
+                }
+                my $rep = substr( $pat, $ic+1, $endpos-($ic+1) );
+                $ic = $endpos + 1;
+
+  	        if( $res =~ /^\^?$/ ){
+		    $res .= "\\{$rep\}";
+                } elsif( $rep =~ /^(\d+)(,?)(\d*)?$/ ){
+                    my $min = $1;
+                    my $com = $2 || '';
+                    my $max = $3;
+                    if( length( $max ) ){
+                        if( $max < $min ){
+                            Warn( "maximum less than minimum in `\\{$rep\\}'",
+				  $fl );
+                            return undef();
+                        }
+                    } else {
+                        $max = '';
+                    }
+		    # simplify some
+		    if( $min == 0 && $max eq '1' ){
+			$res .= '?';
+		    } elsif( $min == 1 && "$com$max" eq ',' ){
+			$res .= '+';
+		    } elsif( $min == 0 && "$com$max" eq ',' ){
+			$res .= '*';
+		    } else {
+			$res .= "{$min$com$max}";
+		    }
+                } else {
+                    Warn( "invalid repeat clause `\\{$rep\\}'", $fl );
+                    return undef();
+                }
+
+            } elsif( $nc =~ /^[1-9]$/ ){
+		## \1 .. \9 => \1 .. \9, but check for a following digit
+		if( $nc > $backref ){
+                    Warn( "invalid backreference ($nc)", $fl );
+                    return undef();
+		}
+                $res .= "\\$nc";
+		if( peek($pat,$ic) =~ /[0-9]/ ){
+		    $res .= '(?:)';
+		}
+
+            } elsif( $useEXTBRE && ( $nc =~ /[$useEXTBRE]/ ) ){
+		## extensions - at most <>wWyB - not in POSIX
+                if(      $nc eq '<' ){ ## \< => \b(?=\w), be precise
+                    $res .= '\\b(?<=\\W)';
+                } elsif( $nc eq '>' ){ ## \> => \b(?=\W), be precise
+                    $res .= '\\b(?=\\W)';
+                } elsif( $nc eq 'y' ){ ## \y => \b
+                    $res .= '\\b';
+                } else {               ## \B, \w, \W remain the same
+                    $res .= "\\$nc";
+                } 
+	    } elsif( $nc eq $led ){
+		## \<closing bracketing-delimiter> - keep '\'
+		$res .= "\\$nc";
+
+            } else { ## \ <char> => <char> ("as if `\' were not present")
+                $res .= $nc;
+            }
+
+        } elsif( $c eq '.' ){ ## . => .
+            $res .= $c;
+
+	} elsif( $c eq '*' ){ ## * => * but \* if there's nothing preceding it
+	    if( $res =~ /^\^?$/ ){
+                $res .= '\\*';
+            } elsif( substr( $res, -1, 1 ) ne '*' ){
+		$res .= $c;
+	    }
+
+        } elsif( $c eq '[' ){
+	    ## parse []: [^...] [^]...] [-...]
+	    my $add = '[';
+	    if( peek($pat,$ic) eq '^' ){
+		$ic++;
+		$add .= '^';
+	    }
+	    my $nc = peek($pat,$ic);
+  	    if( $nc eq ']' || $nc eq '-' ){
+		$add .= $nc;
+                $ic++;
+	    }
+	    # check that [ is not trailing
+	    if( $ic >= length( $pat ) - 1 ){
+		Warn( "unmatched `['", $fl );
+		return undef();
+	    }
+	    # look for [:...:] and x-y
+	    my $rstr = substr( $pat, $ic+1 );
+	    if( $rstr =~ /^((?:\[:\(\w+|[><]\):\]|[^]-](?:-[^]])?)*)/ ){
+ 	        my $cnt = $1;
+		$ic += length( $cnt );
+		$cnt =~ s/([\\\$])/\\$1/g; # `\', `$' are magic in Perl []
+		# try some simplifications
+ 	        my $red = $cnt;
+		if( $red =~ s/0-9// ){
+		    $cnt = $red.'\d';
+		    if( $red =~ s/A-Z// && $red =~ s/a-z// && $red =~ s/_// ){
+			$cnt = $red.'\w';
+                    }
+		}
+		$add .= $cnt;
+
+		# POSIX 1003.2 has this (optional) for begin/end word
+		$add = '\\b(?=\\W)'  if $add eq '[[:<:]]';
+		$add = '\\b(?<=\\W)' if $add eq '[[:>:]]';
+
+	    }
+
+	    ## may have a trailing `-' before `]'
+	    if( $ic < length($pat) - 1 &&
+                substr( $pat, $ic+1 ) =~ /^(-?])/ ){
+		$ic += length( $1 );
+		$add .= $1;
+		# another simplification
+		$add =~ s/^\[(\^?)(\\[dw])]$/ $1 eq '^' ? uc($2) : $2 /e;
+		$res .= $add;
+	    } else {
+		Warn( "unmatched `['", $fl );
+		return undef();
+	    }
+
+        } elsif( $c eq $led ){ ## unescaped <closing bracketing-delimiter>
+            $res .= "\\$c";
+
+        } elsif( $c eq ']' ){ ## unmatched ] is not magic
+            $res .= ']';
+
+        } elsif( $c =~ /[|+?{}()]/ ){ ## not magic in BRE, but in Perl: \-quote
+            $res .= "\\$c";
+
+        } elsif( $c eq '^' ){ ## not magic unless 1st, but in Perl: \-quote
+            $res .= length( $res ) ? '\\^' : '^';
+
+        } elsif( $c eq '$' ){ ## not magic unless last, but in Perl: \-quote
+            $res .= $ic == length( $pat ) - 1 ? '$' : '\\$';
+
+        } else {
+            $res .= $c;
+        }
+    }
+
+    if( $parlev ){
+       Warn( "unmatched `\\('", $fl );
+       return undef();
+    }
+
+    # final cleanup: eliminate raw HTs
+    $res =~ s/\t/\\t/g;
+    return $del . $res . ( $led ? $led : $del );
+}
+
+
+#####
+# sub2p - convert sed substitution to Perl substitution
+#
+sub sub2p($$$){
+    my( $del, $subst, $fl ) = @_;
+    my $led = $del;
+    $led =~ tr/{([</})]>/;
+    $led = '' if $led eq $del;
+
+    $subst = substr( $subst, 1, length($subst) - 2 );
+    my $res = '';
+ 
+    for( my $ic = 0; $ic < length( $subst ); $ic++ ){
+        my $c = substr( $subst, $ic, 1 );
+        if( $c eq '\\' ){
+	    ### backslash escapes
+            my $nc = peek($subst,$ic);
+            if( $nc eq '' ){
+                Warn( "`\\' cannot be last in substitution", $fl );
+                return undef();
+            }
+	    $ic++;
+	    if( $nc =~ /[\\$del$led]/ ){ ## \ and delimiter
+		$res .= '\\' . $nc;
+            } elsif( $nc =~ /[1-9]/ ){ ## \1 - \9 => ${1} - ${9}
+                $res .= '${' . $nc . '}';
+	    } else { ## everything else (includes &): omit \
+		$res .= $nc;
+	    }
+        } elsif( $c eq '&' ){ ## & => $&
+            $res .= '$&';
+	} elsif( $c =~ /[\$\@$led]/ ){ ## magic in Perl's substitution string
+	    $res .= '\\' . $c;
+        } else {
+	    $res .= $c;
 	}
-	print BODY $_, "\n";
-    }
-    $indent += $indmod;
-    $indmod = 0;
-    if ($redo) {
-	$_ = $redo;
-	$redo = '';
-	redo LINE;
-    }
-}
-if ($lastlinewaslabel++) {
-    $indent += 4;
-    print BODY &tab, ";\n";
-    $indent -= 4;
-}
-
-if ($appendseen || $tseen || !$assumen) {
-    $printit++ if $dseen || (!$assumen && !$assumep);
-    print BODY &q(<<'EOT');
-:	#ifdef SAWNEXT
-:	}
-:	continue {
-:	#endif
-:	#ifdef PRINTIT
-:	#ifdef DSEEN
-:	#ifdef ASSUMEP
-:	    print if $printit++;
-:	#else
-:	    if ($printit)
-:		{ print; }
-:	    else
-:		{ $printit++ unless $nflag; }
-:	#endif
-:	#else
-:	    print if $printit;
-:	#endif
-:	#else
-:	    print;
-:	#endif
-:	#ifdef TSEEN
-:	    $tflag = 0;
-:	#endif
-:	#ifdef APPENDSEEN
-:	    if ($atext) { chop $atext; print $atext; $atext = ''; }
-:	#endif
-EOT
-}
-
-print BODY &q(<<'EOT');
-:	}
-EOT
-
-unless ($debug) {
-
-    print &q(<<"EOT");
-:	$startperl
-:	eval 'exec $perlpath -S \$0 \${1+"\$@"}'
-:		if \$running_under_some_shell;
-:	
-EOT
-    print"$opens\n" if $opens;
-    seek(BODY, 0, 0) || die "Can't rewind temp file: $!\n";
-    while (<BODY>) {
-	/^[ \t]*$/ && next;
-	/^#ifdef (\w+)/ && ((${lc $1} || &skip), next);
-	/^#else/ && (&skip, next);
-	/^#endif/ && next;
-	s/^<><>//;
-	print;
-    }
-}
-
-&Cleanup;
-exit;
-
-sub Cleanup {
-    unlink "/tmp/sperl$$";
-}
-sub Die {
-    &Cleanup;
-    die $_[0];
-}
-sub tab {
-    "\t" x ($indent / 8) . ' ' x ($indent % 8);
-}
-sub make_filehandle {
-    local($_) = $_[0];
-    local($fname) = $_;
-    if (!$seen{$fname}) {
-	$_ = "FH_" . $_ if /^\d/;
-	s/[^a-zA-Z0-9]/_/g;
-	s/^_*//;
-	$_ = "\U$_";
-	if ($fhseen{$_}) {
-	    for ($tmp = "a"; $fhseen{"$_$tmp"}; $a++) {}
-	    $_ .= $tmp;
+    }
+
+    # final cleanup: eliminate raw HTs
+    $res =~ s/\t/\\t/g;
+    return ( $led ? $del : $led ) . $res . ( $led ? $led : $del );
+}
+
+
+sub Parse(){
+    my $error = 0;
+    my( $pdef, $pfil, $plin );
+    for( my $icom = 0; $icom < @Commands; $icom++ ){
+	my $cmd = $Commands[$icom];
+	print "Parse:$cmd:\n" if $useDEBUG;
+	$cmd =~ s/^\s+//;
+	next unless length( $cmd );
+	my $scom = $icom;
+	if( exists( $Defined{$icom} ) ){
+	    $pdef = $Defined{$icom};
+	    if( $pdef =~ /^ #(\d+)/ ){
+		$pfil = 'expression #';
+		$plin = $1;
+	    } else {
+		$pfil = "$pdef l.";
+		$plin = 1;
+            }
+        } else {
+	    $plin++;
+        }
+        my $fl = "$pfil$plin";
+
+        # insert command as comment in gnerated code
+	#
+	$Code .= "# $cmd\n" if $doGenerate;
+
+	# The Address(es)
+	#
+	my( $negated, $naddr, $addr1, $addr2 );
+	$naddr = 0;
+	if(      $cmd =~ s/^(\d+)\s*// ){
+	    $addr1 = "$1"; $naddr++;
+	} elsif( $cmd =~ s/^\$\s*// ){
+	    $addr1 = 'eofARGV()'; $naddr++;
+	} elsif( $cmd =~ s{^(/)}{} || $cmd =~ s{^\\(.)}{} ){
+	    my $del = $1;
+	    my $regex = stripRegex( $del, \$cmd );
+	    if( defined( $regex ) ){
+		$addr1 = 'm '.bre2p( $del, $regex, $fl ).'s';
+		$naddr++;
+	    } else {
+		Warn( "malformed regex, 1st address", $fl );
+		$error++;
+		next;
+	    }
+        }
+        if( defined( $addr1 ) && $cmd =~ s/,\s*// ){
+ 	    if(      $cmd =~ s/^(\d+)\s*// ){
+	        $addr2 = "$1"; $naddr++;
+	    } elsif( $cmd =~ s/^\$\s*// ){
+	        $addr2 = 'eofARGV()'; $naddr++;
+	    } elsif( $cmd =~ s{^(/)}{} || $cmd =~ s{^\\(.)}{} ){
+		my $del = $1;
+	        my $regex = stripRegex( $del, \$cmd );
+		if( defined( $regex ) ){
+		    $addr2 = 'm '. bre2p( $del, $regex, $fl ).'s';
+		    $naddr++;
+		} else {
+		    Warn( "malformed regex, 2nd address", $fl );
+		    $error++;
+		    next;
+		}
+            } else {
+		Warn( "invalid address after `,'", $fl );
+		$error++;
+		next;
+            }
+        }
+
+        # address modifier `!'
+        #
+        $negated = $cmd =~ s/^!\s*//;
+	if( defined( $addr1 ) ){
+	    print "Parse: addr1=$addr1" if $useDEBUG;
+	    if( defined( $addr2 ) ){
+		print ", addr2=$addr2 " if $useDEBUG;
+		# both numeric and addr1 > addr2 => eliminate addr2
+		undef( $addr2 ) if $addr1 =~ /^\d+$/ &&
+                                   $addr2 =~ /^\d+$/ && $addr1 > $addr2;
+	    }
 	}
-	$fhseen{$_} = 1;
-	$opens .= &q(<<"EOT");
-:	open($_, '>$fname') || die "Can't create $fname: \$!";
-EOT
-	$seen{$fname} = $_;
-    }
-    $seen{$fname};
-}
-
-sub make_label {
-    local($label) = @_;
-    $label =~ s/[^a-zA-Z0-9]/_/g;
-    if ($label =~ /^[0-9_]/) { $label = 'L' . $label; }
-    $label = substr($label,0,8);
-
-    # Could be a reserved word, so capitalize it.
-    substr($label,0,1) =~ y/a-z/A-Z/
-      if $label =~ /^[a-z]/;
-
-    $label;
-}
-
-sub transmogrify {
-    {	# case
-	if (/^d/) {
-	    $dseen++;
-	    chop($_ = &q(<<'EOT'));
-:	<<--#ifdef PRINTIT
-:	$printit = 0;
-:	<<--#endif
-:	next LINE;
-EOT
-	    $sawnext++;
+	print 'negated' if $useDEBUG && $negated;
+	print " command:$cmd\n" if $useDEBUG;
+
+	# The Command
+	#
+        if( $cmd !~ s/^([:#={}abcdDgGhHilnNpPqrstwxy])\s*// ){
+	    my $h = substr( $cmd, 0, 1 );
+ 	    Warn( "unknown command `$h'", $fl );
+	    $error++;
 	    next;
 	}
+        my $key = $1;
 
-	if (/^n/) {
-	    chop($_ = &q(<<'EOT'));
-:	<<--#ifdef PRINTIT
-:	<<--#ifdef DSEEN
-:	<<--#ifdef ASSUMEP
-:	print if $printit++;
-:	<<--#else
-:	if ($printit)
-:	    { print; }
-:	else
-:	    { $printit++ unless $nflag; }
-:	<<--#endif
-:	<<--#else
-:	print if $printit;
-:	<<--#endif
-:	<<--#else
-:	print;
-:	<<--#endif
-:	<<--#ifdef APPENDSEEN
-:	if ($atext) {chop $atext; print $atext; $atext = '';}
-:	<<--#endif
-:	$_ = <>;
-:	chop;
-:	<<--#ifdef TSEEN
-:	$tflag = 0;
-:	<<--#endif
-EOT
+	my $tabref = $ComTab{$key};
+	if( $naddr > $tabref->[0] ){
+	    Warn( "excess address(es)", $fl );
+	    $error++;
 	    next;
 	}
 
-	if (/^a/) {
-	    $appendseen++;
-	    $command = $space . "\$atext .= <<'End_Of_Text';\n<<--";
-	    $lastline = 0;
-	    while (<>) {
-		s/^[ \t]*//;
-		s/^[\\]//;
-		unless (s|\\$||) { $lastline = 1;}
-		s/^([ \t]*\n)/<><>$1/;
-		$command .= $_;
-		$command .= '<<--';
-		last if $lastline;
+	my $arg = '';
+	if(      $tabref->[1] eq 'str' ){
+	    # take remainder - don't care if it is empty
+	    $arg = $cmd;
+            $cmd = '';
+
+	} elsif( $tabref->[1] eq 'txt' ){
+	    # multi-line text
+	    my $goon = $cmd =~ /(.*)\\$/;
+	    if( length( $1 ) ){
+		Warn( "extra characters after command ($cmd)", $fl );
+		$error++;
 	    }
-	    $_ = $command . "End_Of_Text";
-	    last;
-	}
-
-	if (/^[ic]/) {
-	    if (/^c/) { $change = 1; }
-	    $addr1 = 1 if $addr1 eq '';
-	    $addr1 = '$iter = (' . $addr1 . ')';
-	    $command = $space .
-	      "    if (\$iter == 1) { print <<'End_Of_Text'; }\n<<--";
-	    $lastline = 0;
-	    while (<>) {
-		s/^[ \t]*//;
-		s/^[\\]//;
-		unless (s/\\$//) { $lastline = 1;}
-		s/'/\\'/g;
-		s/^([ \t]*\n)/<><>$1/;
-		$command .= $_;
-		$command .= '<<--';
-		last if $lastline;
-	    }
-	    $_ = $command . "End_Of_Text";
-	    if ($change) {
-		$dseen++;
-		$change = "$_\n";
-		chop($_ = &q(<<"EOT"));
-:	<<--#ifdef PRINTIT
-:	$space\$printit = 0;
-:	<<--#endif
-:	${space}next LINE;
-EOT
-		$sawnext++;
+	    while( $goon ){
+		$icom++;
+		if( $icom > $#Commands ){
+		    Warn( "unexpected end of script", $fl );
+		    $error++;
+		    last;
+		}
+		$cmd = $Commands[$icom];
+		$Code .= "# $cmd\n" if $doGenerate;
+		$goon = $cmd =~ s/\\$//;
+		$cmd =~ s/\\(.)/$1/g;
+		$arg .= "\n" if length( $arg );
+		$arg .= $cmd;
 	    }
-	    last;
-	}
+	    $arg .= "\n" if length( $arg );
+	    $cmd = '';
 
-	if (/^s/) {
-	    $delim = substr($_,1,1);
-	    $len = length($_);
-	    $repl = $end = 0;
-	    $inbracket = 0;
-	    for ($i = 2; $i < $len; $i++) {
-		$c = substr($_,$i,1);
-		if ($c eq $delim) {
-		    if ($inbracket) {
-			substr($_, $i, 0) = '\\';
-			$i++;
-			$len++;
-		    }
-		    else {
-			if ($repl) {
-			    $end = $i;
-			    last;
-			} else {
-			    $repl = $i;
-			}
-		    }
+	} elsif( $tabref->[1] eq 'sub' ){
+	    # s///
+	    if( ! length( $cmd ) ){
+		Warn( "`s' command requires argument", $fl );
+		$error++;
+		next;
+	    }
+	    if( $cmd =~ s{^([^\\\n])}{} ){
+		my $del = $1;
+		my $regex = stripRegex( $del, \$cmd );
+		if( ! defined( $regex ) ){
+		    Warn( "malformed regular expression", $fl );
+		    $error++;
+		    next;
 		}
-		elsif ($c eq '\\') {
-		    $i++;
-		    if ($i >= $len) {
-			$_ .= 'n';
-			$_ .= <>;
-			$len = length($_);
-			$_ = substr($_,0,--$len);
-		    }
-		    elsif (substr($_,$i,1) =~ /^[n]$/) {
-			;
-		    }
-		    elsif (!$repl &&
-		      substr($_,$i,1) =~ /^[(){}\w]$/) {
-			$i--;
-			$len--;
-			substr($_, $i, 1) = '';
-		    }
-		    elsif (!$repl &&
-		      substr($_,$i,1) =~ /^[<>]$/) {
-			substr($_,$i,1) = 'b';
-		    }
-		    elsif ($repl && substr($_,$i,1) =~ /^\d$/) {
-			substr($_,$i-1,1) = '$';
+		$regex = bre2p( $del, $regex, $fl );
+
+		# a trailing \ indicates embedded NL (in replacement string)
+		while( $cmd =~ s/(?<!\\)\\$/\n/ ){
+		    $icom++;
+		    if( $icom > $#Commands ){
+			Warn( "unexpected end of script", $fl );
+			$error++;
+			last;
 		    }
+		    $cmd .= $Commands[$icom];
+	            $Code .= "# $Commands[$icom]\n" if $doGenerate;
 		}
-		elsif ($c eq '@') {
-		    substr($_, $i, 0) = '\\';
-		    $i++;
-		    $len++;
-		}
-		elsif ($c eq '&' && $repl) {
-		    substr($_, $i, 0) = '$';
-		    $i++;
-		    $len++;
-		}
-		elsif ($c eq '$' && $repl) {
-		    substr($_, $i, 0) = '\\';
-		    $i++;
-		    $len++;
-		}
-		elsif ($c eq '[' && !$repl) {
-		    $i++ if substr($_,$i,1) eq '^';
-		    $i++ if substr($_,$i,1) eq ']';
-		    $inbracket = 1;
-		}
-		elsif ($c eq ']') {
-		    $inbracket = 0;
-		}
-		elsif ($c eq "\t") {
-		    substr($_, $i, 1) = '\\t';
-		    $i++;
-		    $len++;
-		}
-		elsif (!$repl && index("()+",$c) >= 0) {
-		    substr($_, $i, 0) = '\\';
-		    $i++;
-		    $len++;
-		}
-	    }
-	    &Die("Malformed substitution at line $.\n")
-	      unless $end;
-	    $pat = substr($_, 0, $repl + 1);
-	    $repl = substr($_, $repl+1, $end-$repl-1);
-	    $end = substr($_, $end + 1, 1000);
-	    &simplify($pat);
-	    $subst = "$pat$repl$delim";
-	    $cmd = '';
-	    while ($end) {
-		if ($end =~ s/^g//) {
-		    $subst .= 'g';
+
+		my $subst = stripRegex( $del, \$cmd );
+		if( ! defined( $regex ) ){
+		    Warn( "malformed substitution expression", $fl );
+		    $error++;
 		    next;
 		}
-		if ($end =~ s/^p//) {
-		    $cmd .= ' && (print)';
+		$subst = sub2p( $del, $subst, $fl );
+
+		# parse s/// modifier: g|p|0-9|w <file>
+		my( $global, $nmatch, $print, $write ) =
+		  ( '',      '',      0,      undef );
+		while( $cmd =~ s/^([gp0-9])// ){
+		    $1 eq 'g' ? ( $global = 'g' ) :
+  		    $1 eq 'p' ? ( $print  = $1  ) : ( $nmatch .= $1 );
+                }
+		$write = $1 if $cmd =~ s/w\s*(.*)$//;
+  	        ### $nmatch =~ s/^(\d)\1*$/$1/; ### may be dangerous?
+		if( $global && length( $nmatch ) || length( $nmatch ) > 1 ){
+		    Warn( "conflicting flags `$global$nmatch'", $fl );
+		    $error++;
 		    next;
 		}
-		if ($end =~ s/^w[ \t]*//) {
-		    $fh = &make_filehandle($end);
-		    $cmd .= " && (print $fh \$_)";
-		    $end = '';
+
+		$arg = makes( $regex, $subst,
+			      $write, $global, $print, $nmatch, $fl );
+		if( ! defined( $arg ) ){
+		    $error++;
 		    next;
 		}
-		&Die("Unrecognized substitution command".
-		  "($end) at line $.\n");
+
+            } else {
+		Warn( "improper delimiter in s command", $fl );
+		$error++;
+		next;
+            }
+
+	} elsif( $tabref->[1] eq 'tra' ){
+	    # y///
+	    # a trailing \ indicates embedded newline
+	    while( $cmd =~ s/(?<!\\)\\$/\n/ ){
+		$icom++;
+		if( $icom > $#Commands ){
+		    Warn( "unexpected end of script", $fl );
+		    $error++;
+		    last;
+		}
+		$cmd .= $Commands[$icom];
+                $Code .= "# $Commands[$icom]\n" if $doGenerate;
+	    }
+	    if( ! length( $cmd ) ){
+		Warn( "`y' command requires argument", $fl );
+		$error++;
+		next;
+	    }
+	    my $d = substr( $cmd, 0, 1 ); $cmd = substr( $cmd, 1 );
+	    if( $d eq '\\' ){
+		Warn( "`\\' not valid as delimiter in `y' command", $fl );
+		$error++;
+		next;
+	    }
+	    my $fr = stripTrans( $d, \$cmd );
+	    if( ! defined( $fr ) || ! length( $cmd ) ){
+		Warn( "malformed `y' command argument", $fl );
+		$error++;
+		next;
+	    }
+	    my $to = stripTrans( $d, \$cmd );
+	    if( ! defined( $to ) ){
+		Warn( "malformed `y' command argument", $fl );
+		$error++;
+		next;
+	    }
+	    if( length($fr) != length($to) ){
+		Warn( "string lengths in `y' command differ", $fl );
+		$error++;
+		next;
+	    }
+	    if( ! defined( $arg = makey( $fr, $to, $fl ) ) ){
+		$error++;
+		next;
 	    }
-	    chop ($_ = &q(<<"EOT"));
-:	<<--#ifdef TSEEN
-:	$subst && \$tflag++$cmd;
-:	<<--#else
-:	$subst$cmd;
-:	<<--#endif
-EOT
-	    next;
-	}
 
-	if (/^p/) {
-	    $_ = 'print;';
-	    next;
 	}
 
-	if (/^w/) {
-	    s/^w[ \t]*//;
-	    $fh = &make_filehandle($_);
-	    $_ = "print $fh \$_;";
-	    next;
+	# $cmd must be now empty - exception is {
+	if( $cmd !~ /^\s*$/ ){
+	    if( $key eq '{' ){
+		# dirty hack to process command on '{' line
+		$Commands[$icom--] = $cmd;
+	    } else {
+		Warn( "extra characters after command ($cmd)", $fl );
+		$error++;
+		next;
+	    }
 	}
 
-	if (/^r/) {
-	    $appendseen++;
-	    s/^r[ \t]*//;
-	    $file = $_;
-	    $_ = "\$atext .= `cat $file 2>/dev/null`;";
-	    next;
+	# Make Code
+        #
+	if( &{$tabref->[2]}( $addr1, $addr2, $negated,
+                             $tabref->[3], $arg, $fl ) ){
+	    $error++;
 	}
+    }
 
-	if (/^P/) {
-	    $_ = 'print $1 if /^(.*)/;';
-	    next;
-	}
+    while( @BlockStack ){
+	my $bl = pop( @BlockStack );
+	Warn( "start of unterminated `{'", $bl );
+        $error++;
+    }
 
-	if (/^D/) {
-	    chop($_ = &q(<<'EOT'));
-:	s/^.*\n?//;
-:	redo LINE if $_;
-:	next LINE;
-EOT
-	    $sawnext++;
-	    next;
+    for my $lab ( keys( %Label ) ){
+	if( ! exists( $Label{$lab}{defined} ) ){
+	    for my $used ( @{$Label{$lab}{used}} ){
+ 	        Warn( "undefined label `$lab'", $used );
+	        $error++;
+	    }
 	}
+    }
 
-	if (/^N/) {
-	    chop($_ = &q(<<'EOT'));
-:	$_ .= "\n";
-:	$len1 = length;
-:	$_ .= <>;
-:	chop if $len1 < length;
-:	<<--#ifdef TSEEN
-:	$tflag = 0;
-:	<<--#endif
-EOT
-	    next;
-	}
+    exit( 1 ) if $error;
+}
 
-	if (/^h/) {
-	    $_ = '$hold = $_;';
-	    next;
-	}
 
-	if (/^H/) {
-	    $_ = '$hold .= "\n", $hold .= $_;';
-	    next;
-	}
+##############
+#### MAIN ####
+##############
 
-	if (/^g/) {
-	    $_ = '$_ = $hold;';
-	    next;
-	}
+sub usage(){
+    print STDERR "Usage: sed [-an] command [file...]\n";
+    print STDERR "           [-an] [-e command] [-f script-file] [file...]\n";
+}
 
-	if (/^G/) {
-	    $_ = '$_ .= "\n", $_ .= $hold;';
-	    next;
+###################
+# Here we go again...
+#
+my $expr = 0;
+while( @ARGV && $ARGV[0] =~ /^-(.)(.*)$/ ){
+    my $opt = $1;
+    my $arg = $2;
+    shift( @ARGV );
+    if(      $opt eq 'e' ){
+        if( length( $arg ) ){
+	    push( @Commands, split( "\n", $arg ) );
+        } elsif( @ARGV ){
+	    push( @Commands, shift( @ARGV ) ); 
+        } else {
+            Warn( "option -e requires an argument" );
+            usage();
+            exit( 1 );
+        }
+	$expr++;
+        $Defined{$#Commands} = " #$expr";
+	next;
+    }
+    if( $opt eq 'f' ){
+        my $path;
+        if( length( $arg ) ){
+	    $path = $arg;
+        } elsif( @ARGV ){
+	    $path = shift( @ARGV ); 
+        } else {
+            Warn( "option -f requires an argument" );
+            usage();
+            exit( 1 );
+        }
+	my $fst = $#Commands + 1;
+        open( SCRIPT, "<$path" ) || die( "$0: $path: could not open ($!)\n" );
+        my $cmd;
+        while( defined( $cmd = <SCRIPT> ) ){
+            chomp( $cmd );
+            push( @Commands, $cmd );
+        }
+        close( SCRIPT );
+	if( $#Commands >= $fst ){
+	    $Defined{$fst} = "$path";
 	}
+	next;
+    }
+    if( $opt eq '-' && $arg eq '' ){
+	last;
+    }
+    if( $opt eq 'h' || $opt eq '?' ){
+        usage();
+        exit( 0 );
+    }
+    if( $opt eq 'n' ){
+	$doAutoPrint = 0;
+    } elsif( $opt eq 'a' ){
+	$doOpenWrite = 0;
+    } else {
+        Warn( "illegal option `$opt'" );
+        usage();
+        exit( 1 );
+    }
+    if( length( $arg ) ){
+	unshift( @ARGV, "-$arg" );
+    }
+}
 
-	if (/^x/) {
-	    $_ = '($_, $hold) = ($hold, $_);';
-	    next;
-	}
+# A singleton command may be the 1st argument when there are no options.
+#
+if( @Commands == 0 ){
+    if( @ARGV == 0 ){
+        Warn( "no script command given" );
+        usage();
+        exit( 1 );
+    }
+    push( @Commands, split( "\n", shift( @ARGV ) ) );
+    $Defined{0} = ' #1';
+}
 
-	if (/^b$/) {
-	    $_ = 'next LINE;';
-	    $sawnext++;
-	    next;
-	}
+print STDERR "Files: @ARGV\n" if $useDEBUG;
 
-	if (/^b/) {
-	    s/^b[ \t]*//;
-	    $lab = &make_label($_);
-	    if ($lab eq $toplabel) {
-		$_ = 'redo LINE;';
-	    } else {
-		$_ = "goto $lab;";
-	    }
-	    next;
-	}
+# generate leading code
+#
+    $Code = <<'[TheEnd]';
+
+sub openARGV(){
+    unshift( @ARGV, '-' ) unless @ARGV;
+    my $file = shift( @ARGV );
+    open( ARG, "<$file" )
+    || die( "$0: can't open $file for reading ($!)\n" );
+    $isEOF = 0;
+}
 
-	if (/^t$/) {
-	    $_ = 'next LINE if $tflag;';
-	    $sawnext++;
-	    $tseen++;
-	    next;
-	}
+sub getsARGV(;\$){
+    my $argref = @_ ? shift() : \$_; 
+    while( $isEOF || ! defined( $$argref = <ARG> ) ){
+	close( ARG );
+	return 0 unless @ARGV;
+	my $file = shift( @ARGV );
+	open( ARG, "<$file" )
+	|| die( "$0: can't open $file for reading ($!)\n" );
+	$isEOF = 0;
+    }
+    1;
+}
 
-	if (/^t/) {
-	    s/^t[ \t]*//;
-	    $lab = &make_label($_);
-	    $_ = q/if ($tflag) {$tflag = 0; /;
-	    if ($lab eq $toplabel) {
-		$_ .= 'redo LINE;}';
-	    } else {
-		$_ .= "goto $lab;}";
-	    }
-	    $tseen++;
-	    next;
-	}
+sub eofARGV(){
+    return @ARGV == 0 && ( $isEOF = eof( ARG ) );
+}
 
-	if (/^y/) {
-	    s/abcdefghijklmnopqrstuvwxyz/a-z/g;
-	    s/ABCDEFGHIJKLMNOPQRSTUVWXYZ/A-Z/g;
-	    s/abcdef/a-f/g;
-	    s/ABCDEF/A-F/g;
-	    s/0123456789/0-9/g;
-	    s/01234567/0-7/g;
-	    $_ .= ';';
+sub makeHandle($){
+    my( $path ) = @_;
+    my $handle;
+    if( ! exists( $wFiles{$path} ) || $wFiles{$path} eq '' ){
+        $handle = $wFiles{$path} = gensym();
+	if( $doOpenWrite ){
+	    if( ! open( $handle, ">$path" ) ){
+		die( "$0: can't open $path for writing: ($!)\n" );
+	    }
 	}
+    } else {
+        $handle = $wFiles{$path};
+    }
+    return $handle;
+}
 
-	if (/^=/) {
-	    $_ = 'print $.;';
-	    next;
-	}
+sub _r($){
+    my $path = shift();
+    push( @Q, \$path );
+}
 
-	if (/^q/) {
-	    chop($_ = &q(<<'EOT'));
-:	close(ARGV);
-:	@ARGV = ();
-:	next LINE;
-EOT
-	    $sawnext++;
-	    next;
-	}
-    } continue {
-	if ($space) {
-	    s/^/$space/;
-	    s/(\n)(.)/$1$space$2/g;
+sub _l(){        
+    my $h = $_;
+    my $mcpl = 70;
+    $h =~ s/\\/\\\\/g;
+    if( $h =~ /[^[:print:]]/ ){
+	$h =~ s/\a/\\a/g;
+	$h =~ s/\f/\\f/g;
+	$h =~ s/\n/\\n/g;
+	$h =~ s/\t/\\t/g;
+	$h =~ s/\r/\\r/g;
+	$h =~ s/\e/\\e/g;
+        $h =~ s/([^[:print:]])/sprintf("\\%03o", ord($1))/ge;
+    }
+    while( length( $h ) > $mcpl ){
+	my $l = substr( $h, 0, $mcpl-1 );
+	$h = substr( $h, $mcpl );
+	# remove incomplete \-escape from end of line
+	if( $l =~ s/(?<!\\)(\\[0-7]{0,2})$// ){
+	    $h = $1 . $h;
 	}
-	last;
+	print $l, "\\\n";
     }
-    $_;
+    print "$h\$\n";
 }
 
-sub fetchpat {
-    local($outer) = @_;
-    local($addr) = $outer;
-    local($inbracket);
-    local($prefix,$delim,$ch);
+sub _w($){
+    my $path   = shift();
+    my $handle = $wFiles{$path};
+    if( ! $doOpenWrite &&
+	! defined( fileno( $handle ) ) ){
+	open( $handle, ">$path" )
+	|| die( "$0: $path: cannot open ($!)\n" );
+    }
+    print $handle $_, "\n";
+}
 
-    # Process pattern one potential delimiter at a time.
+# condition register test/reset
+#
+sub _t(){
+    my $res = $CondReg;
+    $CondReg = 0;
+    $res;
+}
 
-    DELIM: while (s#^([^\]+(|)[\\/]*)([]+(|)[\\/])##) {
-	$prefix = $1;
-	$delim = $2;
-	if ($delim eq '\\') {
-	    s/(.)//;
-	    $ch = $1;
-	    $delim = '' if $ch =~ /^[(){}A-Za-mo-z]$/;
-	    $ch = 'b' if $ch =~ /^[<>]$/;
-	    $delim .= $ch;
-	}
-	elsif ($delim eq '[') {
-	    $inbracket = 1;
-	    s/^\^// && ($delim .= '^');
-	    s/^]// && ($delim .= ']');
-	}
-	elsif ($delim eq ']') {
-	    $inbracket = 0;
-	}
-	elsif ($inbracket || $delim ne $outer) {
-	    $delim = '\\' . $delim;
+# printQ
+#
+sub printQ(){
+    for my $q ( @Q ){
+	if( ref( $q ) ){
+	    if( exists( $wFiles{$$q} ) && $wFiles{$$q} ne '' ){
+		open( $wFiles{$$q}, ">>$$q" );
+	    }
+	    if( open( RF, "<$$q" ) ){
+		my $line;
+		while( defined( $line = <RF> ) ){
+		    print $line;
+		}
+		close( RF );
+	    }
+	} else {
+	    print $q;
 	}
-	$addr .= $prefix;
-	$addr .= $delim;
-	if ($delim eq $outer && !$inbracket) {
-	    last DELIM;
+    }
+    undef( @Q );
+}
+
+sub Run(){
+    my( $h, $icnt, $s, $n );
+    # hack (not unbreakable :-/) to avoid // matching an empty string
+    my $z = "\000"; $z =~ /$z/;
+    # Initialize.
+    openARGV();
+    $Hold    = '';
+    $CondReg = 0;
+    $doPrint = $doAutoPrint;
+CYCLE:
+    while( getsARGV() ){
+	chomp();
+	$CondReg = 0;   # cleared on t
+BOS:;
+[TheEnd]
+
+    # parse - avoid opening files when doing s2p
+    #
+    ( $svOpenWrite, $doOpenWrite ) = (  $doOpenWrite, $svOpenWrite )
+      if $doGenerate;
+    Parse();
+    ( $svOpenWrite, $doOpenWrite ) = (  $doOpenWrite, $svOpenWrite )
+      if $doGenerate;
+
+    # append trailing code
+    #
+    $Code .= <<'[TheEnd]';
+EOS:    if( $doPrint ){
+            print $_, "\n";
+        } else {
+	    $doPrint = $doAutoPrint;
 	}
+        printQ() if @Q;
     }
-    $addr =~ s/\t/\\t/g;
-    $addr =~ s/\@/\\@/g;
-    &simplify($addr);
-    $addr;
+
+    exit( 0 );
 }
+[TheEnd]
 
-sub q {
-    local($string) = @_;
-    local($*) = 1;
-    $string =~ s/^:\t?//g;
-    $string;
+# magic "#n" - same as -n option
+#
+$doAutoPrint = 0 if substr( $Commands[0], 0, 2 ) eq '#n';
+
+# eval code - check for errors
+#
+print "Code:\n$Code" if $useDEBUG;
+eval $Code;
+if( $@ ){
+    print "Code:\n$Code";
+    die( "$0: internal error - generated incorrect Perl code: $@\n" );
 }
 
-sub simplify {
-    $_[0] =~ s/_a-za-z0-9/\\w/ig;
-    $_[0] =~ s/a-z_a-z0-9/\\w/ig;
-    $_[0] =~ s/a-za-z_0-9/\\w/ig;
-    $_[0] =~ s/a-za-z0-9_/\\w/ig;
-    $_[0] =~ s/_0-9a-za-z/\\w/ig;
-    $_[0] =~ s/0-9_a-za-z/\\w/ig;
-    $_[0] =~ s/0-9a-z_a-z/\\w/ig;
-    $_[0] =~ s/0-9a-za-z_/\\w/ig;
-    $_[0] =~ s/\[\\w\]/\\w/g;
-    $_[0] =~ s/\[^\\w\]/\\W/g;
-    $_[0] =~ s/\[0-9\]/\\d/g;
-    $_[0] =~ s/\[^0-9\]/\\D/g;
-    $_[0] =~ s/\\d\\d\*/\\d+/g;
-    $_[0] =~ s/\\D\\D\*/\\D+/g;
-    $_[0] =~ s/\\w\\w\*/\\w+/g;
-    $_[0] =~ s/\\t\\t\*/\\t+/g;
-    $_[0] =~ s/(\[.[^]]*\])\1\*/$1+/g;
-    $_[0] =~ s/([\w\s!@#%^&-=,:;'"])\1\*/$1+/g;
+if( $doGenerate ){
+
+    # write full Perl program
+    #
+ 
+    # bang line, declarations
+    print <<TheEnd;
+#!$perlpath -w
+eval 'exec $perlpath -S \$0 \${1+"\$@"}'
+  if 0;
+\$0 =~ s/^.*?(\\w+)\$/\$1/;
+
+use strict;
+use Symbol;
+use vars qw{ \$isEOF \$Hold \%wFiles \@Q \$CondReg
+	     \$doAutoPrint \$doOpenWrite \$doPrint };
+\$doAutoPrint = $doAutoPrint;
+\$doOpenWrite = $doOpenWrite;
+TheEnd
+
+    my $wf = "'" . join( "', '",  keys( %wFiles ) ) . "'";
+    if( $wf ne "''" ){
+	print <<TheEnd;
+sub makeHandle(\$);
+for my \$p ( $wf ){
+   exit( 1 ) unless makeHandle( \$p );
 }
+TheEnd
+   }
 
-sub skip {
-    local($level) = 0;
+   print $Code;
+   print "&Run()\n";
+   exit( 0 );
 
-    while(<BODY>) {
-	/^#ifdef/ && $level++;
-	/^#else/  && !$level && return;
-	/^#endif/ && !$level-- && return;
-    }
+} else {
+
+    # execute: make handles (and optionally open) all w files; run!
 
-    die "Unterminated `#ifdef' conditional\n";
+    for my $p ( keys( %wFiles ) ){
+        exit( 1 ) unless makeHandle( $p );
+    }
+    &Run();
 }
+
+
+=head1 ENVIRONMENT
+
+The environment variable C<PSEDEXTBRE> may be set to extend BREs.
+See L<"Additional Atoms">.
+
+=head1 DIAGNOSTICS
+
+=over 4
+
+=item ambiguos translation for character `%s' in `y' command
+
+The indicated character appears twice, with different translations.
+
+=item `[' cannot be last in pattern
+
+A `[' in a BRE indicates the beginning of a I<bracket expression>.
+
+=item `\' cannot be last in pattern
+
+A `\' in a BRE is used to make the subsequent character literal.
+
+=item `\' cannot be last in substitution
+
+A `\' in a subsitution string is used to make the subsequent character literal.
+
+=item conflicting flags `%s'
+
+In an B<s> command, either the `g' flag and an n-th occurrence flag, or
+multiple n-th occurrence flags are specified. Note that only the digits
+`1' through `9' are permitted.
+
+=item duplicate label %s (first defined at %s)
+
+=item excess address(es)
+
+The command has more than the permitted number of addresses.
+
+=item extra characters after command (%s)
+
+=item illegal option `%s'
+
+=item improper delimiter in s command
+
+The BRE and substitution may not be delimited with `\' or newline.
+
+=item invalid address after `,'
+
+=item invalid backreference (%s)
+
+The specified backreference number exceeds the number of backreferences
+in the BRE.
+
+=item invalid repeat clause `\{%s\}'
+
+The repeat clause does not contain a valid integer value, or pair of
+values.
+
+=item malformed regex, 1st address
+
+=item malformed regex, 2nd address
+
+=item malformed regular expression
+
+=item malformed substitution expression
+
+=item malformed `y' command argument
+
+The first or second string of a B<y> command  is syntactically incorrect.
+
+=item maximum less than minimum in `\{%s\}'
+
+=item no script command given
+
+There must be at least one B<-e> or one B<-f> option specifying a
+script or script file.
+
+=item `\' not valid as delimiter in `y' command
+
+=item option -e requires an argument
+
+=item option -f requires an argument
+
+=item `s' command requires argument
+
+=item start of unterminated `{'
+
+=item string lengths in `y' command differ
+
+The translation table strings in a B<y> commanf must have equal lengths.
+
+=item undefined label `%s'
+
+=item unexpected `}'
+
+A B<}> command without a preceding B<{> command was encountered.
+
+=item unexpected end of script
+
+The end of the script was reached although a text line after a
+B<a>, B<c> or B<i> command indicated another line.
+
+=item unknown command `%s'
+
+=item unterminated `['
+
+A BRE contains an unterminated bracket expression.
+
+=item unterminated `\('
+
+A BRE contains an unterminated backreference.
+
+=item `\{' without closing `\}'
+
+A BRE contains an unterminated bounds specification.
+
+=item `\)' without preceding `\('
+
+=item `y' command requires argument
+
+=back
+
+=head1 EXAMPLE
+
+The basic material for the preceding section was generated by running
+the sed script
+
+   #no autoprint
+   s/^.*Warn( *"\([^"]*\)".*$/\1/
+   t process
+   b
+   :process
+   s/$!/%s/g
+   s/$[_[:alnum:]]\{1,\}/%s/g
+   s/\\\\/\\/g
+   s/^/=item /
+   p
+
+on the program's own text, and piping the output into C<sort -u>.
+
+
+=head1 SED SCRIPT TRANSLATION
+
+If this program is invoked with the name F<s2p> it will act as a
+sed-to-Perl translator. After option processing (all other
+arguments are ignored), a Perl program is printed on standard
+output, which will process the input stream (as read from all
+arguments) in the way defined by the sed script and the option setting
+used for the translation.
+
+=head1 SEE ALSO
+
+perl(1), re_format(7)
+
+=head1 BUGS
+
+The B<l> command will show escape characters (ESC) as `C<\e>', but
+a vertical tab (VT) in octal.
+
+Trailing spaces are truncated from labels in B<:>, B<t> and B<b> commands.
+
+The meaning of an empty regular expression (`C<//>'), as defined by B<sed>,
+is "the last pattern used, at run time". This deviates from the Perl
+interpretation, which will re-use the "last last successfully executed
+regular expression". Since keeping track of pattern usage would create
+terribly cluttered code, and differences would only appear in obscure
+context (where other B<sed> implementations appear to deviate, too),
+the Perl semantics was adopted. Note that common usage of this feature,
+such as in C</abc/s//xyz/>, will work as expected.
+
+Collating elements (of bracket expressions in BREs) are not implemented.
+
+=head1 STANDARDS
+
+This B<sed> implementation conforms to the IEEE Std1003.2-1992 ("POSIX.2")
+definition of B<sed>, and is compatible with the I<OpenBSD>
+implementation, except where otherwise noted (see L<"BUGS">).
+
+=head1 AUTHOR
+
+This Perl implementation of I<sed> was written by Wolfgang Laun,
+I<Wolfgang.Laun@alcatel.at>.
+
+=head1 COPYRIGHT and LICENSE
+
+This program is free and open software. You may use, modify,
+distribute, and sell this program (and any modified variants) in any
+way you wish, provided you do not restrict others from doing the same.
+
+=cut
+
 !NO!SUBS!
 
 close OUT or die "Can't close $file: $!";
author	Wolfgang Laun <Wolfgang.Laun@alcatel.at>	2000-08-28 16:05:12 +0200
committer	Jarkko Hietaniemi <jhi@iki.fi>	2000-09-08 14:34:18 +0000
commit	86a59229552fc742ed09795227799fb41537f06f (patch)
tree	996057e7273840293fd520f949ed9563c37677e4 /x2p
parent	33b454808819084359e76a3f223a41b842c180b7 (diff)
download	perl-86a59229552fc742ed09795227799fb41537f06f.tar.gz