From fd8438eb9b6bec69a456b69a7dece77aadc06a36 Mon Sep 17 00:00:00 2001 From: ph10 Date: Tue, 23 Sep 2014 11:35:51 +0000 Subject: Documentation scripts git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@79 6239d852-aaf2-0410-a92c-79f79f948069 --- CleanTxt | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100755 CleanTxt (limited to 'CleanTxt') diff --git a/CleanTxt b/CleanTxt new file mode 100755 index 0000000..1f42519 --- /dev/null +++ b/CleanTxt @@ -0,0 +1,113 @@ +#! /usr/bin/perl -w + +# Script to take the output of nroff -man and remove all the backspacing and +# the page footers and the screen commands etc so that it is more usefully +# readable online. In fact, in the latest nroff, intermediate footers don't +# seem to be generated any more. + +$blankcount = 0; +$lastwascut = 0; +$firstheader = 1; + +# Input on STDIN; output to STDOUT. + +while () + { + s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" + s/.\x8//g; # Remove "char, backspace" + + # Handle header lines. Retain only the first one we encounter, but remove + # the blank line that follows. Any others (e.g. at end of document) and the + # following blank line are dropped. + + if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/) + { + if ($firstheader) + { + $firstheader = 0; + print; + $lastprinted = $_; + $lastwascut = 0; + } + $_=; # Remove a blank that follows + next; + } + + # Count runs of empty lines + + if (/^\s*$/) + { + $blankcount++; + $lastwascut = 0; + next; + } + + # If a chunk of lines has been cut out (page footer) and the next line + # has a different indentation, put back one blank line. + + if ($lastwascut && $blankcount < 1 && defined($lastprinted)) + { + ($a) = $lastprinted =~ /^(\s*)/; + ($b) = $_ =~ /^(\s*)/; + $blankcount++ if ($a ne $b); + } + + # We get here only when we have a non-blank line in hand. If it was preceded + # by 3 or more blank lines, read the next 3 lines and see if they are blank. + # If so, remove all 7 lines, and remember that we have just done a cut. + + if ($blankcount >= 3) + { + for ($i = 0; $i < 3; $i++) + { + $next[$i] = ; + $next[$i] = "" if !defined $next[$i]; + $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" + $next[$i] =~ s/.\x8//g; # Remove "char, backspace" + } + + # Cut out chunks of the form <3 blanks><3 blanks> + + if ($next[0] =~ /^\s*$/ && + $next[1] =~ /^\s*$/ && + $next[2] =~ /^\s*$/) + { + $blankcount -= 3; + $lastwascut = 1; + } + + # Otherwise output the saved blanks, the current, and the next three + # lines. Remember the last printed line. + + else + { + for ($i = 0; $i < $blankcount; $i++) { print "\n"; } + print; + for ($i = 0; $i < 3; $i++) + { + $next[$i] =~ s/.\x8//g; + print $next[$i]; + $lastprinted = $_; + } + $lastwascut = 0; + $blankcount = 0; + } + } + + # This non-blank line is not preceded by 3 or more blank lines. Output + # any blanks there are, and the line. Remember it. Force two blank lines + # before headings. + + else + { + $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ && + defined($lastprinted); + for ($i = 0; $i < $blankcount; $i++) { print "\n"; } + print; + $lastprinted = $_; + $lastwascut = 0; + $blankcount = 0; + } + } + +# End -- cgit v1.2.1