diff options
Diffstat (limited to 'x2p/s2p.PL')
-rw-r--r-- | x2p/s2p.PL | 2072 |
1 files changed, 0 insertions, 2072 deletions
diff --git a/x2p/s2p.PL b/x2p/s2p.PL deleted file mode 100644 index 8a5abaeca8..0000000000 --- a/x2p/s2p.PL +++ /dev/null @@ -1,2072 +0,0 @@ -#!/usr/bin/perl - -use Config; -use File::Basename qw(&basename &dirname); -use Cwd; -use subs qw(link); - -sub link { # This is a cut-down version of installperl:link(). - my($from,$to) = @_; - my($success) = 0; - - eval { - CORE::link($from, $to) - ? $success++ - : ($from =~ m#^/afs/# || $to =~ m#^/afs/#) - ? die "AFS" # okay inside eval {} - : die "Couldn't link $from to $to: $!\n"; - }; - if ($@) { - require File::Copy; - File::Copy::copy($from, $to) - ? $success++ - : warn "Couldn't copy $from to $to: $!\n"; - } - $success; -} - -# List explicitly here the variables you want Configure to -# generate. Metaconfig only looks for shell variables, so you -# have to mention them as if they were shell variables, not -# %Config entries. Thus you write -# $startperl -# to ensure Configure will look for $Config{startperl}. - -# This forces PL files to create target in same directory as PL file. -# This is so that make depend always knows where to find PL derivatives. -$origdir = cwd; -chdir dirname($0); -$file = basename($0, '.PL'); -$file .= '.com' if $^O eq 'VMS'; - -open OUT,">$file" or die "Can't create $file: $!"; - -print "Extracting $file (with variable substitutions)\n"; - -# In this section, perl variables will be expanded during extraction. -# You can use $Config{...} to use Configure variables. - -print OUT <<"!GROK!THIS!"; -$Config{startperl} - eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' - if \$running_under_some_shell; -my \$startperl; -my \$perlpath; -(\$startperl = <<'/../') =~ s/\\s*\\z//; -$Config{startperl} -/../ -(\$perlpath = <<'/../') =~ s/\\s*\\z//; -$Config{perlpath} -/../ -!GROK!THIS! - -# In the following, perl variables are not expanded during extraction. - -print OUT <<'!NO!SUBS!'; - -$0 =~ s/^.*?(\w+)[\.\w]*$/$1/; - -# (p)sed - a stream editor -# History: Aug 12 2000: Original version. -# Mar 25 2002: Rearrange generated Perl program. -# Jul 23 2007: Fix bug in regex stripping (M.Thorland) - -use strict; -use integer; -use Symbol; - -=head1 NAME - -psed - a stream editor - -=head1 SYNOPSIS - - psed [-an] script [file ...] - psed [-an] [-e script] [-f script-file] [file ...] - - s2p [-an] [-e script] [-f script-file] - -=head1 DESCRIPTION - -A stream editor reads the input stream consisting of the specified files -(or standard input, if none are given), processes is line by line by -applying a script consisting of edit commands, and writes resulting lines -to standard output. The filename 'C<->' may be used to read standard input. - -The edit script is composed from arguments of B<-e> options and -script-files, in the given order. A single script argument may be specified -as the first parameter. - -If this program is invoked with the name F<s2p>, it will act as a -sed-to-Perl translator. See L<"SED SCRIPT TRANSLATION">. - -B<sed> returns an exit code of 0 on success or >0 if an error occurred. - -=head1 OPTIONS - -=over 4 - -=item B<-a> - -A file specified as argument to the B<w> edit command is by default -opened before input processing starts. Using B<-a>, opening of such -files is delayed until the first line is actually written to the file. - -=item B<-e> I<script> - -The editing commands defined by I<script> are appended to the script. -Multiple commands must be separated by newlines. - -=item B<-f> I<script-file> - -Editing commands from the specified I<script-file> are read and appended -to the script. - -=item B<-n> - -By default, a line is written to standard output after the editing script -has been applied to it. The B<-n> option suppresses automatic printing. - -=back - -=head1 COMMANDS - -B<sed> command syntax is defined as - -Z<> Z<> Z<> Z<>[I<address>[B<,>I<address>]][B<!>]I<function>[I<argument>] - -with whitespace being permitted before or after addresses, and between -the function character and the argument. The I<address>es and the -address inverter (C<!>) are used to restrict the application of a -command to the selected line(s) of input. - -Each command must be on a line of its own, except where noted in -the synopses below. - -The edit cycle performed on each input line consist of reading the line -(without its trailing newline character) into the I<pattern space>, -applying the applicable commands of the edit script, writing the final -contents of the pattern space and a newline to the standard output. -A I<hold space> is provided for saving the contents of the -pattern space for later use. - -=head2 Addresses - -A sed address is either a line number or a pattern, which may be combined -arbitrarily to construct ranges. Lines are numbered across all input files. - -Any address may be followed by an exclamation mark ('C<!>'), selecting -all lines not matching that address. - -=over 4 - -=item I<number> - -The line with the given number is selected. - -=item B<$> - -A dollar sign (C<$>) is the line number of the last line of the input stream. - -=item B</>I<regular expression>B</> - -A pattern address is a basic regular expression (see -L<"BASIC REGULAR EXPRESSIONS">), between the delimiting character C</>. -Any other character except C<\> or newline may be used to delimit a -pattern address when the initial delimiter is prefixed with a -backslash ('C<\>'). - -=back - -If no address is given, the command selects every line. - -If one address is given, it selects the line (or lines) matching the -address. - -Two addresses select a range that begins whenever the first address -matches, and ends (including that line) when the second address matches. -If the first (second) address is a matching pattern, the second -address is not applied to the very same line to determine the end of -the range. Likewise, if the second address is a matching pattern, the -first address is not applied to the very same line to determine the -begin of another range. If both addresses are line numbers, -and the second line number is less than the first line number, then -only the first line is selected. - - -=head2 Functions - -The maximum permitted number of addresses is indicated with each -function synopsis below. - -The argument I<text> consists of one or more lines following the command. -Embedded newlines in I<text> must be preceded with a backslash. Other -backslashes in I<text> are deleted and the following character is taken -literally. - -=over 4 - -=cut - -my %ComTab; -my %GenKey; -#-------------------------------------------------------------------------- -$ComTab{'a'}=[ 1, 'txt', \&Emit, '{ push( @Q, <<'."'TheEnd' ) }\n" ]; #ok - -=item [1addr]B<a\> I<text> - -Write I<text> (which must start on the line following the command) -to standard output immediately before reading the next line -of input, either by executing the B<N> function or by beginning a new cycle. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'b'}=[ 2, 'str', \&Branch, '{ goto XXX; }' ]; #ok - -=item [2addr]B<b> [I<label>] - -Branch to the B<:> function with the specified I<label>. If no label -is given, branch to the end of the script. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'c'}=[ 2, 'txt', \&Change, <<'-X-' ]; #ok -{ print <<'TheEnd'; } $doPrint = 0; goto EOS; --X- -### continue OK => next CYCLE; - -=item [2addr]B<c\> I<text> - -The line, or range of lines, selected by the address is deleted. -The I<text> (which must start on the line following the command) -is written to standard output. With an address range, this occurs at -the end of the range. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'d'}=[ 2, '', \&Emit, <<'-X-' ]; #ok -{ $doPrint = 0; - goto EOS; -} --X- -### continue OK => next CYCLE; - -=item [2addr]B<d> - -Deletes the pattern space and starts the next cycle. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'D'}=[ 2, '', \&Emit, <<'-X-' ]; #ok -{ s/^.*\n?//; - if(length($_)){ goto BOS } else { goto EOS } -} --X- -### continue OK => next CYCLE; - -=item [2addr]B<D> - -Deletes the pattern space through the first embedded newline or to the end. -If the pattern space becomes empty, a new cycle is started, otherwise -execution of the script is restarted. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'g'}=[ 2, '', \&Emit, '{ $_ = $Hold };' ]; #ok - -=item [2addr]B<g> - -Replace the contents of the pattern space with the hold space. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'G'}=[ 2, '', \&Emit, '{ $_ .= "\n"; $_ .= $Hold };' ]; #ok - -=item [2addr]B<G> - -Append a newline and the contents of the hold space to the pattern space. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'h'}=[ 2, '', \&Emit, '{ $Hold = $_ }' ]; #ok - -=item [2addr]B<h> - -Replace the contents of the hold space with the pattern space. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'H'}=[ 2, '', \&Emit, '{ $Hold .= "\n"; $Hold .= $_; }' ]; #ok - -=item [2addr]B<H> - -Append a newline and the contents of the pattern space to the hold space. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'i'}=[ 1, 'txt', \&Emit, '{ print <<'."'TheEnd' }\n" ]; #ok - -=item [1addr]B<i\> I<text> - -Write the I<text> (which must start on the line following the command) -to standard output. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'l'}=[ 2, '', \&Emit, '{ _l() }' ]; #okUTF8 - -=item [2addr]B<l> - -Print the contents of the pattern space: non-printable characters are -shown in C-style escaped form; long lines are split and have a trailing -^'C<\>' at the point of the split; the true end of a line is marked with -a 'C<$>'. Escapes are: '\a', '\t', '\n', '\f', '\r', '\e' for -BEL, HT, LF, FF, CR, ESC, respectively, and '\' followed by a three-digit -octal number for all other non-printable characters. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'n'}=[ 2, '', \&Emit, <<'-X-' ]; #ok -{ print $_, "\n" if $doPrint; - printQ() if @Q; - $CondReg = 0; - last CYCLE unless getsARGV(); - chomp(); -} --X- - -=item [2addr]B<n> - -If automatic printing is enabled, write the pattern space to the standard -output. Replace the pattern space with the next line of input. If -there is no more input, processing is terminated. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'N'}=[ 2, '', \&Emit, <<'-X-' ]; #ok -{ printQ() if @Q; - $CondReg = 0; - last CYCLE unless getsARGV( $h ); - chomp( $h ); - $_ .= "\n$h"; -} --X- - -=item [2addr]B<N> - -Append a newline and the next line of input to the pattern space. If -there is no more input, processing is terminated. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'p'}=[ 2, '', \&Emit, '{ print $_, "\n"; }' ]; #ok - -=item [2addr]B<p> - -Print the pattern space to the standard output. (Use the B<-n> option -to suppress automatic printing at the end of a cycle if you want to -avoid double printing of lines.) - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'P'}=[ 2, '', \&Emit, <<'-X-' ]; #ok -{ if( /^(.*)/ ){ print $1, "\n"; } } --X- - -=item [2addr]B<P> - -Prints the pattern space through the first embedded newline or to the end. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'q'}=[ 1, '', \&Emit, <<'-X-' ]; #ok -{ print $_, "\n" if $doPrint; - last CYCLE; -} --X- - -=item [1addr]B<q> - -Branch to the end of the script and quit without starting a new cycle. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'r'}=[ 1, 'str', \&Emit, "{ _r( '-X-' ) }" ]; #ok - -=item [1addr]B<r> I<file> - -Copy the contents of the I<file> to standard output immediately before -the next attempt to read a line of input. Any error encountered while -reading I<file> is silently ignored. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'s'}=[ 2, 'sub', \&Emit, '' ]; #ok - -=item [2addr]B<s/>I<regular expression>B</>I<replacement>B</>I<flags> - -Substitute the I<replacement> string for the first substring in -the pattern space that matches the I<regular expression>. -Any character other than backslash or newline can be used instead of a -slash to delimit the regular expression and the replacement. -To use the delimiter as a literal character within the regular expression -and the replacement, precede the character by a backslash ('C<\>'). - -Literal newlines may be embedded in the replacement string by -preceding a newline with a backslash. - -Within the replacement, an ampersand ('C<&>') is replaced by the string -matching the regular expression. The strings 'C<\1>' through 'C<\9>' are -replaced by the corresponding subpattern (see L<"BASIC REGULAR EXPRESSIONS">). -To get a literal 'C<&>' or 'C<\>' in the replacement text, precede it -by a backslash. - -The following I<flags> modify the behaviour of the B<s> command: - -=over 8 - -=item B<g> - -The replacement is performed for all matching, non-overlapping substrings -of the pattern space. - -=item B<1>..B<9> - -Replace only the n-th matching substring of the pattern space. - -=item B<p> - -If the substitution was made, print the new value of the pattern space. - -=item B<w> I<file> - -If the substitution was made, write the new value of the pattern space -to the specified file. - -=back - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'t'}=[ 2, 'str', \&Branch, '{ goto XXX if _t() }' ]; #ok - -=item [2addr]B<t> [I<label>] - -Branch to the B<:> function with the specified I<label> if any B<s> -substitutions have been made since the most recent reading of an input line -or execution of a B<t> function. If no label is given, branch to the end of -the script. - - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'w'}=[ 2, 'str', \&Write, "{ _w( '-X-' ) }" ]; #ok - -=item [2addr]B<w> I<file> - -The contents of the pattern space are written to the I<file>. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'x'}=[ 2, '', \&Emit, '{ ($Hold, $_) = ($_, $Hold) }' ]; #ok - -=item [2addr]B<x> - -Swap the contents of the pattern space and the hold space. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'y'}=[ 2, 'tra', \&Emit, '' ]; #ok - -=item [2addr]B<y>B</>I<string1>B</>I<string2>B</> - -In the pattern space, replace all characters occurring in I<string1> by the -character at the corresponding position in I<string2>. It is possible -to use any character (other than a backslash or newline) instead of a -slash to delimit the strings. Within I<string1> and I<string2>, a -backslash followed by any character other than a newline is that literal -character, and a backslash followed by an 'n' is replaced by a newline -character. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'='}=[ 1, '', \&Emit, '{ print "$.\n" }' ]; #ok - -=item [1addr]B<=> - -Prints the current line number on the standard output. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{':'}=[ 0, 'str', \&Label, '' ]; #ok - -=item [0addr]B<:> [I<label>] - -The command specifies the position of the I<label>. It has no other effect. - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'{'}=[ 2, '', \&BeginBlock, '{' ]; #ok -$ComTab{'}'}=[ 0, '', \&EndBlock, ';}' ]; #ok -# ';' to avoid warning on empty {}-block - -=item [2addr]B<{> [I<command>] - -=item [0addr]B<}> - -These two commands begin and end a command list. The first command may -be given on the same line as the opening B<{> command. The commands -within the list are jointly selected by the address(es) given on the -B<{> command (but may still have individual addresses). - -=cut - -#-------------------------------------------------------------------------- -$ComTab{'#'}=[ 0, 'str', \&Comment, '' ]; #ok - -=item [0addr]B<#> [I<comment>] - -The entire line is ignored (treated as a comment). If, however, the first -two characters in the script are 'C<#n>', automatic printing of output is -suppressed, as if the B<-n> option were given on the command line. - -=back - -=cut - -use vars qw{ $isEOF $Hold %wFiles @Q $CondReg $doPrint }; - -my $useDEBUG = exists( $ENV{PSEDDEBUG} ); -my $useEXTBRE = $ENV{PSEDEXTBRE} || ''; -$useEXTBRE =~ s/[^<>wWyB]//g; # gawk RE's handle these - -my $doAutoPrint = 1; # automatic printing of pattern space (-n => 0) -my $doOpenWrite = 1; # open w command output files at start (-a => 0) -my $svOpenWrite = 0; # save $doOpenWrite - -# lower case $0 below as a VMSism. The VMS build procedure creates the -# s2p file traditionally in upper case on the disk. When VMS is in a -# case preserved or case sensitive mode, $0 will be returned in the exact -# case which will be on the disk, and that is not predictable at this time. - -my $doGenerate = lc($0) eq 's2p'; - -# Collected and compiled script -# -my( @Commands, %Defined, @BlockStack, %Label, $labNum, $Code, $Func ); -$Code = ''; - -################## -# Compile Time -# -# Labels -# -# Error handling -# -sub Warn($;$){ - my( $msg, $loc ) = @_; - $loc ||= ''; - $loc .= ': ' if length( $loc ); - warn( "$0: $loc$msg\n" ); -} - -$labNum = 0; -sub newLabel(){ - return 'L_'.++$labNum; -} - -# safeHere: create safe here delimiter and modify opcode and argument -# -sub safeHere($$){ - my( $codref, $argref ) = @_; - my $eod = 'EOD000'; - while( $$argref =~ /^$eod$/m ){ - $eod++; - } - $$codref =~ s/TheEnd/$eod/e; - $$argref .= "$eod\n"; -} - -# Emit: create address logic and emit command -# -sub Emit($$$$$$){ - my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_; - my $cond = ''; - if( defined( $addr1 ) ){ - if( defined( $addr2 ) ){ - $addr1 .= $addr2 =~ /^\d+$/ ? "..$addr2" : "...$addr2"; - } else { - $addr1 .= ' == $.' if $addr1 =~ /^\d+$/; - } - $cond = $negated ? "unless( $addr1 )\n" : "if( $addr1 )\n"; - } - - if( $opcode eq '' ){ - $Code .= "$cond$arg\n"; - - } elsif( $opcode =~ s/-X-/$arg/e ){ - $Code .= "$cond$opcode\n"; - - } elsif( $opcode =~ /TheEnd/ ){ - safeHere( \$opcode, \$arg ); - $Code .= "$cond$opcode$arg"; - - } else { - $Code .= "$cond$opcode\n"; - } - 0; -} - -# Write (w command, w flag): store pathname -# -sub Write($$$$$$){ - my( $addr1, $addr2, $negated, $opcode, $path, $fl ) = @_; - $wFiles{$path} = ''; - Emit( $addr1, $addr2, $negated, $opcode, $path, $fl ); -} - - -# Label (: command): label definition -# -sub Label($$$$$$){ - my( $addr1, $addr2, $negated, $opcode, $lab, $fl ) = @_; - my $rc = 0; - $lab =~ s/\s+//; - if( length( $lab ) ){ - my $h; - if( ! exists( $Label{$lab} ) ){ - $h = $Label{$lab}{name} = newLabel(); - } else { - $h = $Label{$lab}{name}; - if( exists( $Label{$lab}{defined} ) ){ - my $dl = $Label{$lab}{defined}; - Warn( "duplicate label $lab (first defined at $dl)", $fl ); - $rc = 1; - } - } - $Label{$lab}{defined} = $fl; - $Code .= "$h:;\n"; - } - $rc; -} - -# BeginBlock ({ command): push block start -# -sub BeginBlock($$$$$$){ - my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_; - push( @BlockStack, [ $fl, $addr1, $addr2, $negated ] ); - Emit( $addr1, $addr2, $negated, $opcode, $arg, $fl ); -} - -# EndBlock (} command): check proper nesting -# -sub EndBlock($$$$$$){ - my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_; - my $rc; - my $jcom = pop( @BlockStack ); - if( defined( $jcom ) ){ - $rc = Emit( $addr1, $addr2, $negated, $opcode, $arg, $fl ); - } else { - Warn( "unexpected '}'", $fl ); - $rc = 1; - } - $rc; -} - -# Branch (t, b commands): check or create label, substitute default -# -sub Branch($$$$$$){ - my( $addr1, $addr2, $negated, $opcode, $lab, $fl ) = @_; - $lab =~ s/\s+//; # no spaces at end - my $h; - if( length( $lab ) ){ - if( ! exists( $Label{$lab} ) ){ - $h = $Label{$lab}{name} = newLabel(); - } else { - $h = $Label{$lab}{name}; - } - push( @{$Label{$lab}{used}}, $fl ); - } else { - $h = 'EOS'; - } - $opcode =~ s/XXX/$h/e; - Emit( $addr1, $addr2, $negated, $opcode, '', $fl ); -} - -# Change (c command): is special due to range end watching -# -sub Change($$$$$$){ - my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_; - my $kwd = $negated ? 'unless' : 'if'; - if( defined( $addr2 ) ){ - $addr1 .= $addr2 =~ /^\d+$/ ? "..$addr2" : "...$addr2"; - if( ! $negated ){ - $addr1 = '$icnt = ('.$addr1.')'; - $opcode = 'if( $icnt =~ /E0$/ )' . $opcode; - } - } else { - $addr1 .= ' == $.' if $addr1 =~ /^\d+$/; - } - safeHere( \$opcode, \$arg ); - $Code .= "$kwd( $addr1 ){\n $opcode$arg}\n"; - 0; -} - - -# Comment (# command): A no-op. Who would've thought that! -# -sub Comment($$$$$$){ - my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_; -### $Code .= "# $arg\n"; - 0; -} - -# stripRegex from the current command. If we're in the first -# part of s///, trailing spaces have to be kept as the initial -# part of the replacement string. -# -sub stripRegex($$;$){ - my( $del, $sref, $sub ) = @_; - my $regex = $del; - print "stripRegex:$del:$$sref:\n" if $useDEBUG; - while( $$sref =~ s{^(.*?)(\\*)\Q$del\E(\s*)}{}s ){ - my $sl = $2; - $regex .= $1.$sl.$del; - if( length( $sl ) % 2 == 0 ){ - if( $sub && (length( $3 ) > 0) ){ - $$sref = $3 . $$sref; - } - return $regex; - } - $regex .= $3; - } - undef(); -} - -# stripTrans: take a <del> terminated string from y command -# honoring and cleaning up of \-escaped <del>'s -# -sub stripTrans($$){ - my( $del, $sref ) = @_; - my $t = ''; - print "stripTrans:$del:$$sref:\n" if $useDEBUG; - while( $$sref =~ s{^(.*?)(\\*)\Q$del\E}{}s ){ - my $sl = $2; - $t .= $1; - if( length( $sl ) % 2 == 0 ){ - $t .= $sl; - $t =~ s/\\\\/\\/g; - return $t; - } - chop( $sl ); - $t .= $sl.$del.$3; - } - undef(); -} - -# makey - construct Perl y/// from sed y/// -# -sub makey($$$){ - my( $fr, $to, $fl ) = @_; - my $error = 0; - - # Ensure that any '-' is up front. - # Diagnose duplicate contradicting mappings - my %tr; - for( my $i = 0; $i < length($fr); $i++ ){ - my $fc = substr($fr,$i,1); - my $tc = substr($to,$i,1); - if( exists( $tr{$fc} ) && $tr{$fc} ne $tc ){ - Warn( "ambiguous translation for character '$fc' in 'y' command", - $fl ); - $error++; - } - $tr{$fc} = $tc; - } - $fr = $to = ''; - if( exists( $tr{'-'} ) ){ - ( $fr, $to ) = ( '-', $tr{'-'} ); - delete( $tr{'-'} ); - } else { - $fr = $to = ''; - } - # might just as well sort it... - for my $fc ( sort keys( %tr ) ){ - $fr .= $fc; - $to .= $tr{$fc}; - } - # make embedded delimiters and newlines safe - $fr =~ s/([{}])/\$1/g; - $to =~ s/([{}])/\$1/g; - $fr =~ s/\n/\\n/g; - $to =~ s/\n/\\n/g; - return $error ? undef() : "{ y{$fr}{$to}; }"; -} - -###### -# makes - construct Perl s/// from sed s/// -# -sub makes($$$$$$$){ - my( $regex, $subst, $path, $global, $print, $nmatch, $fl ) = @_; - - # make embedded newlines safe - $regex =~ s/\n/\\n/g; - $subst =~ s/\n/\\n/g; - - my $code; - # n-th occurrence - # - if( length( $nmatch ) ){ - $code = <<TheEnd; -{ \$n = $nmatch; - while( --\$n && ( \$s = m ${regex}g ) ){} - \$s = ( substr( \$_, pos() ) =~ s ${regex}${subst}s ) if \$s; - \$CondReg ||= \$s; -TheEnd - } else { - $code = <<TheEnd; -{ \$s = s ${regex}${subst}s${global}; - \$CondReg ||= \$s; -TheEnd - } - if( $print ){ - $code .= ' print $_, "\n" if $s;'."\n"; - } - if( defined( $path ) ){ - $wFiles{$path} = ''; - $code .= " _w( '$path' ) if \$s;\n"; - $GenKey{'w'} = 1; - } - $code .= "}"; -} - -=head1 BASIC REGULAR EXPRESSIONS - -A I<Basic Regular Expression> (BRE), as defined in POSIX 1003.2, consists -of I<atoms>, for matching parts of a string, and I<bounds>, specifying -repetitions of a preceding atom. - -=head2 Atoms - -The possible atoms of a BRE are: B<.>, matching any single character; -B<^> and B<$>, matching the null string at the beginning or end -of a string, respectively; a I<bracket expressions>, enclosed -in B<[> and B<]> (see below); and any single character with no -other significance (matching that character). A B<\> before one -of: B<.>, B<^>, B<$>, B<[>, B<*>, B<\>, matching the character -after the backslash. A sequence of atoms enclosed in B<\(> and B<\)> -becomes an atom and establishes the target for a I<backreference>, -consisting of the substring that actually matches the enclosed atoms. -Finally, B<\> followed by one of the digits B<0> through B<9> is a -backreference. - -A B<^> that is not first, or a B<$> that is not last does not have -a special significance and need not be preceded by a backslash to -become literal. The same is true for a B<]>, that does not terminate -a bracket expression. - -An unescaped backslash cannot be last in a BRE. - -=head2 Bounds - -The BRE bounds are: B<*>, specifying 0 or more matches of the preceding -atom; B<\{>I<count>B<\}>, specifying that many repetitions; -B<\{>I<minimum>B<,\}>, giving a lower limit; and -B<\{>I<minimum>B<,>I<maximum>B<\}> finally defines a lower and upper -bound. - -A bound appearing as the first item in a BRE is taken literally. - -=head2 Bracket Expressions - -A I<bracket expression> is a list of characters, character ranges -and character classes enclosed in B<[> and B<]> and matches any -single character from the represented set of characters. - -A character range is written as two characters separated by B<-> and -represents all characters (according to the character collating sequence) -that are not less than the first and not greater than the second. -(Ranges are very collating-sequence-dependent, and portable programs -should avoid relying on them.) - -A character class is one of the class names - - alnum digit punct - alpha graph space - blank lower upper - cntrl print xdigit - -enclosed in B<[:> and B<:]> and represents the set of characters -as defined in ctype(3). - -If the first character after B<[> is B<^>, the sense of matching is -inverted. - -To include a literal 'C<^>', place it anywhere else but first. To -include a literal 'C<]>' place it first or immediately after an -initial B<^>. To include a literal 'C<->' make it the first (or -second after B<^>) or last character, or the second endpoint of -a range. - -The special bracket expression constructs C<[[:E<lt>:]]> and C<[[:E<gt>:]]> -match the null string at the beginning and end of a word respectively. -(Note that neither is identical to Perl's '\b' atom.) - -=head2 Additional Atoms - -Since some sed implementations provide additional regular expression -atoms (not defined in POSIX 1003.2), B<psed> is capable of translating -the following backslash escapes: - -=over 4 - -=item B<\E<lt>> This is the same as C<[[:E<gt>:]]>. - -=item B<\E<gt>> This is the same as C<[[:E<lt>:]]>. - -=item B<\w> This is an abbreviation for C<[[:alnum:]_]>. - -=item B<\W> This is an abbreviation for C<[^[:alnum:]_]>. - -=item B<\y> Match the empty string at a word boundary. - -=item B<\B> Match the empty string between any two either word or non-word characters. - -=back - -To enable this feature, the environment variable PSEDEXTBRE must be set -to a string containing the requested characters, e.g.: -C<PSEDEXTBRE='E<lt>E<gt>wW'>. - -=cut - -##### -# bre2p - convert BRE to Perl RE -# -sub peek(\$$){ - my( $pref, $ic ) = @_; - $ic < length($$pref)-1 ? substr( $$pref, $ic+1, 1 ) : ''; -} - -sub bre2p($$$){ - my( $del, $pat, $fl ) = @_; - my $led = $del; - $led =~ tr/{([</})]>/; - $led = '' if $led eq $del; - - $pat = substr( $pat, 1, length($pat) - 2 ); - my $res = ''; - my $bracklev = 0; - my $backref = 0; - my $parlev = 0; - for( my $ic = 0; $ic < length( $pat ); $ic++ ){ - my $c = substr( $pat, $ic, 1 ); - if( $c eq '\\' ){ - ### backslash escapes - my $nc = peek($pat,$ic); - if( $nc eq '' ){ - Warn( "'\\' cannot be last in pattern", $fl ); - return undef(); - } - $ic++; - if( $nc eq $del ){ ## \<pattern del> => \<pattern del> - $res .= "\\$del"; - - } elsif( $nc =~ /([[.*\\n])/ ){ - ## check for \-escaped magics and \n: - ## \[ \. \* \\ \n stay as they are - $res .= '\\'.$nc; - - } elsif( $nc eq '(' ){ ## \( => ( - $parlev++; - $res .= '('; - - } elsif( $nc eq ')' ){ ## \) => ) - $parlev--; - $backref++; - if( $parlev < 0 ){ - Warn( "unmatched '\\)'", $fl ); - return undef(); - } - $res .= ')'; - - } elsif( $nc eq '{' ){ ## repetition factor \{<i>[,[<j>]]\} - my $endpos = index( $pat, '\\}', $ic ); - if( $endpos < 0 ){ - Warn( "unmatched '\\{'", $fl ); - return undef(); - } - my $rep = substr( $pat, $ic+1, $endpos-($ic+1) ); - $ic = $endpos + 1; - - if( $res =~ /^\^?$/ ){ - $res .= "\\{$rep\}"; - } elsif( $rep =~ /^(\d+)(,?)(\d*)?$/ ){ - my $min = $1; - my $com = $2 || ''; - my $max = $3; - if( length( $max ) ){ - if( $max < $min ){ - Warn( "maximum less than minimum in '\\{$rep\\}'", - $fl ); - return undef(); - } - } else { - $max = ''; - } - # simplify some - if( $min == 0 && $max eq '1' ){ - $res .= '?'; - } elsif( $min == 1 && "$com$max" eq ',' ){ - $res .= '+'; - } elsif( $min == 0 && "$com$max" eq ',' ){ - $res .= '*'; - } else { - $res .= "{$min$com$max}"; - } - } else { - Warn( "invalid repeat clause '\\{$rep\\}'", $fl ); - return undef(); - } - - } elsif( $nc =~ /^[1-9]$/ ){ - ## \1 .. \9 => \1 .. \9, but check for a following digit - if( $nc > $backref ){ - Warn( "invalid backreference ($nc)", $fl ); - return undef(); - } - $res .= "\\$nc"; - if( peek($pat,$ic) =~ /[0-9]/ ){ - $res .= '(?:)'; - } - - } elsif( $useEXTBRE && ( $nc =~ /[$useEXTBRE]/ ) ){ - ## extensions - at most <>wWyB - not in POSIX - if( $nc eq '<' ){ ## \< => \b(?=\w), be precise - $res .= '\\b(?<=\\W)'; - } elsif( $nc eq '>' ){ ## \> => \b(?=\W), be precise - $res .= '\\b(?=\\W)'; - } elsif( $nc eq 'y' ){ ## \y => \b - $res .= '\\b'; - } else { ## \B, \w, \W remain the same - $res .= "\\$nc"; - } - } elsif( $nc eq $led ){ - ## \<closing bracketing-delimiter> - keep '\' - $res .= "\\$nc"; - - } else { ## \ <char> => <char> ("as if '\' were not present") - $res .= $nc; - } - - } elsif( $c eq '.' ){ ## . => . - $res .= $c; - - } elsif( $c eq '*' ){ ## * => * but \* if there's nothing preceding it - if( $res =~ /^\^?$/ ){ - $res .= '\\*'; - } elsif( substr( $res, -1, 1 ) ne '*' ){ - $res .= $c; - } - - } elsif( $c eq '[' ){ - ## parse []: [^...] [^]...] [-...] - my $add = '['; - if( peek($pat,$ic) eq '^' ){ - $ic++; - $add .= '^'; - } - my $nc = peek($pat,$ic); - if( $nc eq ']' || $nc eq '-' ){ - $add .= $nc; - $ic++; - } - # check that [ is not trailing - if( $ic >= length( $pat ) - 1 ){ - Warn( "unmatched '['", $fl ); - return undef(); - } - # look for [:...:] and x-y - my $rstr = substr( $pat, $ic+1 ); - if( $rstr =~ /^((?:\[:\(\w+|[><]\):\]|[^]-](?:-[^]])?)*)/ ){ - my $cnt = $1; - $ic += length( $cnt ); - $cnt =~ s/([\\\$])/\\$1/g; # '\', '$' are magic in Perl [] - # try some simplifications - my $red = $cnt; - if( $red =~ s/0-9// ){ - $cnt = $red.'\d'; - if( $red =~ s/A-Z// && $red =~ s/a-z// && $red =~ s/_// ){ - $cnt = $red.'\w'; - } - } - $add .= $cnt; - - # POSIX 1003.2 has this (optional) for begin/end word - $add = '\\b(?=\\W)' if $add eq '[[:<:]]'; - $add = '\\b(?<=\\W)' if $add eq '[[:>:]]'; - - } - - ## may have a trailing '-' before ']' - if( $ic < length($pat) - 1 && - substr( $pat, $ic+1 ) =~ /^(-?])/ ){ - $ic += length( $1 ); - $add .= $1; - # another simplification - $add =~ s/^\[(\^?)(\\[dw])]$/ $1 eq '^' ? uc($2) : $2 /e; - $res .= $add; - } else { - Warn( "unmatched '['", $fl ); - return undef(); - } - - } elsif( $c eq $led ){ ## unescaped <closing bracketing-delimiter> - $res .= "\\$c"; - - } elsif( $c eq ']' ){ ## unmatched ] is not magic - $res .= ']'; - - } elsif( $c =~ /[|+?{}()]/ ){ ## not magic in BRE, but in Perl: \-quote - $res .= "\\$c"; - - } elsif( $c eq '^' ){ ## not magic unless 1st, but in Perl: \-quote - $res .= length( $res ) ? '\\^' : '^'; - - } elsif( $c eq '$' ){ ## not magic unless last, but in Perl: \-quote - $res .= $ic == length( $pat ) - 1 ? '$' : '\\$'; - - } else { - $res .= $c; - } - } - - if( $parlev ){ - Warn( "unmatched '\\('", $fl ); - return undef(); - } - - # final cleanup: eliminate raw HTs - $res =~ s/\t/\\t/g; - return $del . $res . ( $led ? $led : $del ); -} - - -##### -# sub2p - convert sed substitution to Perl substitution -# -sub sub2p($$$){ - my( $del, $subst, $fl ) = @_; - my $led = $del; - $led =~ tr/{([</})]>/; - $led = '' if $led eq $del; - - $subst = substr( $subst, 1, length($subst) - 2 ); - my $res = ''; - - for( my $ic = 0; $ic < length( $subst ); $ic++ ){ - my $c = substr( $subst, $ic, 1 ); - if( $c eq '\\' ){ - ### backslash escapes - my $nc = peek($subst,$ic); - if( $nc eq '' ){ - Warn( "'\\' cannot be last in substitution", $fl ); - return undef(); - } - $ic++; - if( $nc =~ /[\\$del$led]/ ){ ## \ and delimiter - $res .= '\\' . $nc; - } elsif( $nc =~ /[1-9]/ ){ ## \1 - \9 => ${1} - ${9} - $res .= '${' . $nc . '}'; - } else { ## everything else (includes &): omit \ - $res .= $nc; - } - } elsif( $c eq '&' ){ ## & => $& - $res .= '$&'; - } elsif( $c =~ /[\$\@$led]/ ){ ## magic in Perl's substitution string - $res .= '\\' . $c; - } else { - $res .= $c; - } - } - - # final cleanup: eliminate raw HTs - $res =~ s/\t/\\t/g; - return ( $led ? $del : $led ) . $res . ( $led ? $led : $del ); -} - - -sub Parse(){ - my $error = 0; - my( $pdef, $pfil, $plin ); - for( my $icom = 0; $icom < @Commands; $icom++ ){ - my $cmd = $Commands[$icom]; - print "Parse:$cmd:\n" if $useDEBUG; - $cmd =~ s/^\s+//; - next unless length( $cmd ); - my $scom = $icom; - if( exists( $Defined{$icom} ) ){ - $pdef = $Defined{$icom}; - if( $pdef =~ /^ #(\d+)/ ){ - $pfil = 'expression #'; - $plin = $1; - } else { - $pfil = "$pdef l."; - $plin = 1; - } - } else { - $plin++; - } - my $fl = "$pfil$plin"; - - # insert command as comment in gnerated code - # - $Code .= "# $cmd\n" if $doGenerate; - - # The Address(es) - # - my( $negated, $naddr, $addr1, $addr2 ); - $naddr = 0; - if( $cmd =~ s/^(\d+)\s*// ){ - $addr1 = "$1"; $naddr++; - } elsif( $cmd =~ s/^\$\s*// ){ - $addr1 = 'eofARGV()'; $naddr++; - } elsif( $cmd =~ s{^(/)}{} || $cmd =~ s{^\\(.)}{} ){ - my $del = $1; - my $regex = stripRegex( $del, \$cmd ); - if( defined( $regex ) ){ - $addr1 = 'm '.bre2p( $del, $regex, $fl ).'s'; - $naddr++; - } else { - Warn( "malformed regex, 1st address", $fl ); - $error++; - next; - } - } - if( defined( $addr1 ) && $cmd =~ s/,\s*// ){ - if( $cmd =~ s/^(\d+)\s*// ){ - $addr2 = "$1"; $naddr++; - } elsif( $cmd =~ s/^\$\s*// ){ - $addr2 = 'eofARGV()'; $naddr++; - } elsif( $cmd =~ s{^(/)}{} || $cmd =~ s{^\\(.)}{} ){ - my $del = $1; - my $regex = stripRegex( $del, \$cmd ); - if( defined( $regex ) ){ - $addr2 = 'm '. bre2p( $del, $regex, $fl ).'s'; - $naddr++; - } else { - Warn( "malformed regex, 2nd address", $fl ); - $error++; - next; - } - } else { - Warn( "invalid address after ','", $fl ); - $error++; - next; - } - } - - # address modifier '!' - # - $negated = $cmd =~ s/^!\s*//; - if( defined( $addr1 ) ){ - print "Parse: addr1=$addr1" if $useDEBUG; - if( defined( $addr2 ) ){ - print ", addr2=$addr2 " if $useDEBUG; - # both numeric and addr1 > addr2 => eliminate addr2 - undef( $addr2 ) if $addr1 =~ /^\d+$/ && - $addr2 =~ /^\d+$/ && $addr1 > $addr2; - } - } - print 'negated' if $useDEBUG && $negated; - print " command:$cmd\n" if $useDEBUG; - - # The Command - # - if( $cmd !~ s/^([:#={}abcdDgGhHilnNpPqrstwxy])\s*// ){ - my $h = substr( $cmd, 0, 1 ); - Warn( "unknown command '$h'", $fl ); - $error++; - next; - } - my $key = $1; - - my $tabref = $ComTab{$key}; - $GenKey{$key} = 1; - if( $naddr > $tabref->[0] ){ - Warn( "excess address(es)", $fl ); - $error++; - next; - } - - my $arg = ''; - if( $tabref->[1] eq 'str' ){ - # take remainder - don't care if it is empty - $arg = $cmd; - $cmd = ''; - - } elsif( $tabref->[1] eq 'txt' ){ - # multi-line text - my $goon = $cmd =~ /(.*)\\$/; - if( length( $1 ) ){ - Warn( "extra characters after command ($cmd)", $fl ); - $error++; - } - while( $goon ){ - $icom++; - if( $icom > $#Commands ){ - Warn( "unexpected end of script", $fl ); - $error++; - last; - } - $cmd = $Commands[$icom]; - $Code .= "# $cmd\n" if $doGenerate; - $goon = $cmd =~ s/\\$//; - $cmd =~ s/\\(.)/$1/g; - $arg .= "\n" if length( $arg ); - $arg .= $cmd; - } - $arg .= "\n" if length( $arg ); - $cmd = ''; - - } elsif( $tabref->[1] eq 'sub' ){ - # s/// - if( ! length( $cmd ) ){ - Warn( "'s' command requires argument", $fl ); - $error++; - next; - } - if( $cmd =~ s{^([^\\\n])}{} ){ - my $del = $1; - my $regex = stripRegex( $del, \$cmd, "s" ); - if( ! defined( $regex ) ){ - Warn( "malformed regular expression", $fl ); - $error++; - next; - } - $regex = bre2p( $del, $regex, $fl ); - - # a trailing \ indicates embedded NL (in replacement string) - while( $cmd =~ s/(?<!\\)\\$/\n/ ){ - $icom++; - if( $icom > $#Commands ){ - Warn( "unexpected end of script", $fl ); - $error++; - last; - } - $cmd .= $Commands[$icom]; - $Code .= "# $Commands[$icom]\n" if $doGenerate; - } - - my $subst = stripRegex( $del, \$cmd ); - if( ! defined( $regex ) ){ - Warn( "malformed substitution expression", $fl ); - $error++; - next; - } - $subst = sub2p( $del, $subst, $fl ); - - # parse s/// modifier: g|p|0-9|w <file> - my( $global, $nmatch, $print, $write ) = - ( '', '', 0, undef ); - while( $cmd =~ s/^([gp0-9])// ){ - $1 eq 'g' ? ( $global = 'g' ) : - $1 eq 'p' ? ( $print = $1 ) : ( $nmatch .= $1 ); - } - $write = $1 if $cmd =~ s/w\s*(.*)$//; - ### $nmatch =~ s/^(\d)\1*$/$1/; ### may be dangerous? - if( $global && length( $nmatch ) || length( $nmatch ) > 1 ){ - Warn( "conflicting flags '$global$nmatch'", $fl ); - $error++; - next; - } - - $arg = makes( $regex, $subst, - $write, $global, $print, $nmatch, $fl ); - if( ! defined( $arg ) ){ - $error++; - next; - } - - } else { - Warn( "improper delimiter in s command", $fl ); - $error++; - next; - } - - } elsif( $tabref->[1] eq 'tra' ){ - # y/// - # a trailing \ indicates embedded newline - while( $cmd =~ s/(?<!\\)\\$/\n/ ){ - $icom++; - if( $icom > $#Commands ){ - Warn( "unexpected end of script", $fl ); - $error++; - last; - } - $cmd .= $Commands[$icom]; - $Code .= "# $Commands[$icom]\n" if $doGenerate; - } - if( ! length( $cmd ) ){ - Warn( "'y' command requires argument", $fl ); - $error++; - next; - } - my $d = substr( $cmd, 0, 1 ); $cmd = substr( $cmd, 1 ); - if( $d eq '\\' ){ - Warn( "'\\' not valid as delimiter in 'y' command", $fl ); - $error++; - next; - } - my $fr = stripTrans( $d, \$cmd ); - if( ! defined( $fr ) || ! length( $cmd ) ){ - Warn( "malformed 'y' command argument", $fl ); - $error++; - next; - } - my $to = stripTrans( $d, \$cmd ); - if( ! defined( $to ) ){ - Warn( "malformed 'y' command argument", $fl ); - $error++; - next; - } - if( length($fr) != length($to) ){ - Warn( "string lengths in 'y' command differ", $fl ); - $error++; - next; - } - if( ! defined( $arg = makey( $fr, $to, $fl ) ) ){ - $error++; - next; - } - - } - - # $cmd must be now empty - exception is { - if( $cmd !~ /^\s*$/ ){ - if( $key eq '{' ){ - # dirty hack to process command on '{' line - $Commands[$icom--] = $cmd; - } else { - Warn( "extra characters after command ($cmd)", $fl ); - $error++; - next; - } - } - - # Make Code - # - if( &{$tabref->[2]}( $addr1, $addr2, $negated, - $tabref->[3], $arg, $fl ) ){ - $error++; - } - } - - while( @BlockStack ){ - my $bl = pop( @BlockStack ); - Warn( "start of unterminated '{'", $bl ); - $error++; - } - - for my $lab ( keys( %Label ) ){ - if( ! exists( $Label{$lab}{defined} ) ){ - for my $used ( @{$Label{$lab}{used}} ){ - Warn( "undefined label '$lab'", $used ); - $error++; - } - } - } - - exit( 1 ) if $error; -} - - -############## -#### MAIN #### -############## - -sub usage(){ - print STDERR "Usage: sed [-an] command [file...]\n"; - print STDERR " [-an] [-e command] [-f script-file] [file...]\n"; -} - -################### -# Here we go again... -# -my $expr = 0; -while( @ARGV && $ARGV[0] =~ /^-(.)(.*)$/ ){ - my $opt = $1; - my $arg = $2; - shift( @ARGV ); - if( $opt eq 'e' ){ - if( length( $arg ) ){ - push( @Commands, split( "\n", $arg ) ); - } elsif( @ARGV ){ - push( @Commands, shift( @ARGV ) ); - } else { - Warn( "option -e requires an argument" ); - usage(); - exit( 1 ); - } - $expr++; - $Defined{$#Commands} = " #$expr"; - next; - } - if( $opt eq 'f' ){ - my $path; - if( length( $arg ) ){ - $path = $arg; - } elsif( @ARGV ){ - $path = shift( @ARGV ); - } else { - Warn( "option -f requires an argument" ); - usage(); - exit( 1 ); - } - my $fst = $#Commands + 1; - open( SCRIPT, "<$path" ) || die( "$0: $path: could not open ($!)\n" ); - my $cmd; - while( defined( $cmd = <SCRIPT> ) ){ - chomp( $cmd ); - push( @Commands, $cmd ); - } - close( SCRIPT ); - if( $#Commands >= $fst ){ - $Defined{$fst} = "$path"; - } - next; - } - if( $opt eq '-' && $arg eq '' ){ - last; - } - if( $opt eq 'h' || $opt eq '?' ){ - usage(); - exit( 0 ); - } - if( $opt eq 'n' ){ - $doAutoPrint = 0; - } elsif( $opt eq 'a' ){ - $doOpenWrite = 0; - } else { - Warn( "illegal option '$opt'" ); - usage(); - exit( 1 ); - } - if( length( $arg ) ){ - unshift( @ARGV, "-$arg" ); - } -} - -# A singleton command may be the 1st argument when there are no options. -# -if( @Commands == 0 ){ - if( @ARGV == 0 ){ - Warn( "no script command given" ); - usage(); - exit( 1 ); - } - push( @Commands, split( "\n", shift( @ARGV ) ) ); - $Defined{0} = ' #1'; -} - -print STDERR "Files: @ARGV\n" if $useDEBUG; - -# generate leading code -# -$Func = <<'[TheEnd]'; - -# openARGV: open 1st input file -# -sub openARGV(){ - unshift( @ARGV, '-' ) unless @ARGV; - my $file = shift( @ARGV ); - open( ARG, "<$file" ) - || die( "$0: can't open $file for reading ($!)\n" ); - $isEOF = 0; -} - -# getsARGV: Read another input line into argument (default: $_). -# Move on to next input file, and reset EOF flag $isEOF. -sub getsARGV(;\$){ - my $argref = @_ ? shift() : \$_; - while( $isEOF || ! defined( $$argref = <ARG> ) ){ - close( ARG ); - return 0 unless @ARGV; - my $file = shift( @ARGV ); - open( ARG, "<$file" ) - || die( "$0: can't open $file for reading ($!)\n" ); - $isEOF = 0; - } - 1; -} - -# eofARGV: end-of-file test -# -sub eofARGV(){ - return @ARGV == 0 && ( $isEOF = eof( ARG ) ); -} - -# makeHandle: Generates another file handle for some file (given by its path) -# to be written due to a w command or an s command's w flag. -sub makeHandle($){ - my( $path ) = @_; - my $handle; - if( ! exists( $wFiles{$path} ) || $wFiles{$path} eq '' ){ - $handle = $wFiles{$path} = gensym(); - if( $doOpenWrite ){ - if( ! open( $handle, ">$path" ) ){ - die( "$0: can't open $path for writing: ($!)\n" ); - } - } - } else { - $handle = $wFiles{$path}; - } - return $handle; -} - -# printQ: Print queued output which is either a string or a reference -# to a pathname. -sub printQ(){ - for my $q ( @Q ){ - if( ref( $q ) ){ - # flush open w files so that reading this file gets it all - if( exists( $wFiles{$$q} ) && $wFiles{$$q} ne '' ){ - open( $wFiles{$$q}, ">>$$q" ); - } - # copy file to stdout: slow, but safe - if( open( RF, "<$$q" ) ){ - while( defined( my $line = <RF> ) ){ - print $line; - } - close( RF ); - } - } else { - print $q; - } - } - undef( @Q ); -} - -[TheEnd] - -# generate the sed loop -# -$Code .= <<'[TheEnd]'; -sub openARGV(); -sub getsARGV(;\$); -sub eofARGV(); -sub printQ(); - -# Run: the sed loop reading input and applying the script -# -sub Run(){ - my( $h, $icnt, $s, $n ); - # hack (not unbreakable :-/) to avoid // matching an empty string - my $z = "\000"; $z =~ /$z/; - # Initialize. - openARGV(); - $Hold = ''; - $CondReg = 0; - $doPrint = $doAutoPrint; -CYCLE: - while( getsARGV() ){ - chomp(); - $CondReg = 0; # cleared on t -BOS:; -[TheEnd] - - # parse - avoid opening files when doing s2p - # - ( $svOpenWrite, $doOpenWrite ) = ( $doOpenWrite, $svOpenWrite ) - if $doGenerate; - Parse(); - ( $svOpenWrite, $doOpenWrite ) = ( $doOpenWrite, $svOpenWrite ) - if $doGenerate; - - # append trailing code - # - $Code .= <<'[TheEnd]'; -EOS: if( $doPrint ){ - print $_, "\n"; - } else { - $doPrint = $doAutoPrint; - } - printQ() if @Q; - } - - exit( 0 ); -} -[TheEnd] - - -# append optional functions, prepend prototypes -# -my $Proto = "# prototypes\n"; -if( $GenKey{'l'} ){ - $Proto .= "sub _l();\n"; - $Func .= <<'[TheEnd]'; -# _l: l command processing -# -sub _l(){ - my $h = $_; - my $mcpl = 70; - # transform non printing chars into escape notation - $h =~ s/\\/\\\\/g; - if( $h =~ /[^[:print:]]/ ){ - $h =~ s/\a/\\a/g; - $h =~ s/\f/\\f/g; - $h =~ s/\n/\\n/g; - $h =~ s/\t/\\t/g; - $h =~ s/\r/\\r/g; - $h =~ s/\e/\\e/g; - $h =~ s/([^[:print:]])/sprintf("\\%03o", ord($1))/ge; - } - # split into lines of length $mcpl - while( length( $h ) > $mcpl ){ - my $l = substr( $h, 0, $mcpl-1 ); - $h = substr( $h, $mcpl ); - # remove incomplete \-escape from end of line - if( $l =~ s/(?<!\\)(\\[0-7]{0,2})$// ){ - $h = $1 . $h; - } - print $l, "\\\n"; - } - print "$h\$\n"; -} - -[TheEnd] -} - -if( $GenKey{'r'} ){ - $Proto .= "sub _r(\$);\n"; - $Func .= <<'[TheEnd]'; -# _r: r command processing: Save a reference to the pathname. -# -sub _r($){ - my $path = shift(); - push( @Q, \$path ); -} - -[TheEnd] -} - -if( $GenKey{'t'} ){ - $Proto .= "sub _t();\n"; - $Func .= <<'[TheEnd]'; -# _t: t command - condition register test/reset -# -sub _t(){ - my $res = $CondReg; - $CondReg = 0; - $res; -} - -[TheEnd] -} - -if( $GenKey{'w'} ){ - $Proto .= "sub _w(\$);\n"; - $Func .= <<'[TheEnd]'; -# _w: w command and s command's w flag - write to file -# -sub _w($){ - my $path = shift(); - my $handle = $wFiles{$path}; - if( ! $doOpenWrite && ! defined( fileno( $handle ) ) ){ - open( $handle, ">$path" ) - || die( "$0: $path: cannot open ($!)\n" ); - } - print $handle $_, "\n"; -} - -[TheEnd] -} - -$Code = $Proto . $Code; - -# magic "#n" - same as -n option -# -$doAutoPrint = 0 if substr( $Commands[0], 0, 2 ) eq '#n'; - -# eval code - check for errors -# -print "Code:\n$Code$Func" if $useDEBUG; -eval $Code . $Func; -if( $@ ){ - print "Code:\n$Code$Func"; - die( "$0: internal error - generated incorrect Perl code: $@\n" ); -} - -if( $doGenerate ){ - - # write full Perl program - # - - # bang line, declarations, prototypes - print <<TheEnd; -#!$perlpath -w -eval 'exec $perlpath -S \$0 \${1+"\$@"}' - if 0; -\$0 =~ s/^.*?(\\w+)\[\\.\\w+\]*\$/\$1/; - -use strict; -use Symbol; -use vars qw{ \$isEOF \$Hold \%wFiles \@Q \$CondReg - \$doAutoPrint \$doOpenWrite \$doPrint }; -\$doAutoPrint = $doAutoPrint; -\$doOpenWrite = $doOpenWrite; -TheEnd - - my $wf = "'" . join( "', '", keys( %wFiles ) ) . "'"; - if( $wf ne "''" ){ - print <<TheEnd; -sub makeHandle(\$); -for my \$p ( $wf ){ - exit( 1 ) unless makeHandle( \$p ); -} -TheEnd - } - - print $Code; - print "Run();\n"; - print $Func; - exit( 0 ); - -} else { - - # execute: make handles (and optionally open) all w files; run! - for my $p ( keys( %wFiles ) ){ - exit( 1 ) unless makeHandle( $p ); - } - Run(); -} - - -=head1 ENVIRONMENT - -The environment variable C<PSEDEXTBRE> may be set to extend BREs. -See L<"Additional Atoms">. - -=head1 DIAGNOSTICS - -=over 4 - -=item ambiguous translation for character '%s' in 'y' command - -The indicated character appears twice, with different translations. - -=item '[' cannot be last in pattern - -A '[' in a BRE indicates the beginning of a I<bracket expression>. - -=item '\' cannot be last in pattern - -A '\' in a BRE is used to make the subsequent character literal. - -=item '\' cannot be last in substitution - -A '\' in a substitution string is used to make the subsequent character literal. - -=item conflicting flags '%s' - -In an B<s> command, either the 'g' flag and an n-th occurrence flag, or -multiple n-th occurrence flags are specified. Note that only the digits -^'1' through '9' are permitted. - -=item duplicate label %s (first defined at %s) - -=item excess address(es) - -The command has more than the permitted number of addresses. - -=item extra characters after command (%s) - -=item illegal option '%s' - -=item improper delimiter in s command - -The BRE and substitution may not be delimited with '\' or newline. - -=item invalid address after ',' - -=item invalid backreference (%s) - -The specified backreference number exceeds the number of backreferences -in the BRE. - -=item invalid repeat clause '\{%s\}' - -The repeat clause does not contain a valid integer value, or pair of -values. - -=item malformed regex, 1st address - -=item malformed regex, 2nd address - -=item malformed regular expression - -=item malformed substitution expression - -=item malformed 'y' command argument - -The first or second string of a B<y> command is syntactically incorrect. - -=item maximum less than minimum in '\{%s\}' - -=item no script command given - -There must be at least one B<-e> or one B<-f> option specifying a -script or script file. - -=item '\' not valid as delimiter in 'y' command - -=item option -e requires an argument - -=item option -f requires an argument - -=item 's' command requires argument - -=item start of unterminated '{' - -=item string lengths in 'y' command differ - -The translation table strings in a B<y> command must have equal lengths. - -=item undefined label '%s' - -=item unexpected '}' - -A B<}> command without a preceding B<{> command was encountered. - -=item unexpected end of script - -The end of the script was reached although a text line after a -B<a>, B<c> or B<i> command indicated another line. - -=item unknown command '%s' - -=item unterminated '[' - -A BRE contains an unterminated bracket expression. - -=item unterminated '\(' - -A BRE contains an unterminated backreference. - -=item '\{' without closing '\}' - -A BRE contains an unterminated bounds specification. - -=item '\)' without preceding '\(' - -=item 'y' command requires argument - -=back - -=head1 EXAMPLE - -The basic material for the preceding section was generated by running -the sed script - - #no autoprint - s/^.*Warn( *"\([^"]*\)".*$/\1/ - t process - b - :process - s/$!/%s/g - s/$[_[:alnum:]]\{1,\}/%s/g - s/\\\\/\\/g - s/^/=item / - p - -on the program's own text, and piping the output into C<sort -u>. - - -=head1 SED SCRIPT TRANSLATION - -If this program is invoked with the name F<s2p> it will act as a -sed-to-Perl translator. After option processing (all other -arguments are ignored), a Perl program is printed on standard -output, which will process the input stream (as read from all -arguments) in the way defined by the sed script and the option setting -used for the translation. - -=head1 SEE ALSO - -perl(1), re_format(7) - -=head1 BUGS - -The B<l> command will show escape characters (ESC) as 'C<\e>', but -a vertical tab (VT) in octal. - -Trailing spaces are truncated from labels in B<:>, B<t> and B<b> commands. - -The meaning of an empty regular expression ('C<//>'), as defined by B<sed>, -is "the last pattern used, at run time". This deviates from the Perl -interpretation, which will re-use the "last last successfully executed -regular expression". Since keeping track of pattern usage would create -terribly cluttered code, and differences would only appear in obscure -context (where other B<sed> implementations appear to deviate, too), -the Perl semantics was adopted. Note that common usage of this feature, -such as in C</abc/s//xyz/>, will work as expected. - -Collating elements (of bracket expressions in BREs) are not implemented. - -=head1 STANDARDS - -This B<sed> implementation conforms to the IEEE Std1003.2-1992 ("POSIX.2") -definition of B<sed>, and is compatible with the I<OpenBSD> -implementation, except where otherwise noted (see L<"BUGS">). - -=head1 AUTHOR - -This Perl implementation of I<sed> was written by Wolfgang Laun, -I<Wolfgang.Laun@alcatel.at>. - -=head1 COPYRIGHT and LICENSE - -This program is free and open software. You may use, modify, -distribute, and sell this program (and any modified variants) in any -way you wish, provided you do not restrict others from doing the same. - -=cut - -!NO!SUBS! - -close OUT or die "Can't close $file: $!"; -chmod 0755, $file or die "Can't reset permissions for $file: $!\n"; -unlink 'psed'; -print "Linking $file to psed.\n"; -if (defined $Config{d_link}) { - link $file, 'psed'; -} else { - unshift @INC, '../lib'; - require File::Copy; - File::Copy::syscopy('s2p', 'psed'); -} -exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':'; -chdir $origdir; |