diff options
Diffstat (limited to 'Docs/Support/docbook-fixup.pl')
-rwxr-xr-x | Docs/Support/docbook-fixup.pl | 200 |
1 files changed, 0 insertions, 200 deletions
diff --git a/Docs/Support/docbook-fixup.pl b/Docs/Support/docbook-fixup.pl deleted file mode 100755 index 48ab085ad3e..00000000000 --- a/Docs/Support/docbook-fixup.pl +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/perl -w - -# Fix the output of `makeinfo --docbook` version 4.0c -# Convert the broken docbook output to well-formed XML that conforms to the O'Reilly idiom -# See code for detailed comments -# Authors: Arjen Lentz and Zak Greant (original code by Jeremy Cole) - -use strict; - -my $data = ''; -my @apx = (); -my $apx = ''; -my @nodes = (); -my $nodes = ''; - -msg ("-- Post-processing `makeinfo --docbook` output --"); -msg ("** Written to work with makeinfo version 4.0c **\n"); - -msg ("Discarding DTD - not required by subsequent scripts"); -# <> is a magic filehandle - either reading lines from stdin or from file(s) specified on the command line -<>; - -msg ("Create an XML PI with ISO-8859-1 character encoding"); -$data = "<?xml version='1.0' encoding='ISO-8859-1'?>"; - -msg ("Get the rest of the data"); -$data = $data . join "", <>; - -msg ("Add missing <bookinfo> and <abstract> opening tags"); -# Note the absence of the g (global) pattern modified. This situation can only happen once. -# ...as soon as we find the first instance, we can stop looking. -$data =~ s/<book lang="en">/<book lang="en"><bookinfo><abstract>/; - - -# arjen 2002-05-01 -msg ("Processing docbook-prefix special strings"); -$data =~ s/FIXUPmdashFIXUP/\&mdash\;/g; - -$data =~ s/FIXUPdoubledashFIXUP/--/g; - -$data =~ s/FIXUPstrongFIXUP/<emphasis\ role\=\"bold\">/g; -$data =~ s/FIXUPendstrongFIXUP/<\/emphasis>/g; - -$data =~ s/FIXUPemphFIXUP/<emphasis>/g; -$data =~ s/FIXUPendemphFIXUP/<\/emphasis>/g; - -$data =~ s/FIXUPfileFIXUP/<filename>/g; -$data =~ s/FIXUPendfileFIXUP/<\/filename>/g; - -$data =~ s/FIXUPsampFIXUP/<literal>/g; -$data =~ s/FIXUPendsampFIXUP/<\/literal>/g; - - -msg ("Removing mailto: from email addresses..."); -$data =~ s/mailto://g; - -msg ("Removing INFORMALFIGURE..."); -$data =~ s{<informalfigure>.+?</informalfigure>} - {}gs; - -msg ("Convert ampersand to XML escape sequence..."); -$data =~ s/&(?!\w+;)/&/g; - -# arjen 2002-05-01 -msg ("Changing (TM) to XML escape sequence..."); -$data =~ s/MySQL \(TM\)/MySQL™/g; -$data =~ s{<command>TM</command>} - {™}g; - -# arjen 2002-05-01 -msg ("Changing ' -- ' to XML escape sequence..."); -$data =~ s/ -- /—/g; - -msg ("Changing @@ to @..."); -$data =~ s/@@/@/g; - -msg ("Rework references of the notation '<n>'"); -# Need to talk to Arjen about what the <n> bits are for -$data =~ s/<(\d)>/[$1]/g; - -msg ("Changing '_' to '-' in references..."); -$data =~ s{((?:id|linkend)=\".+?\")} - {&underscore2hyphen($1)}gex; - -msg ("Changing ULINK to SYSTEMITEM..."); -$data =~ s{<ulink url=\"(.+?)\">\s*</ulink>} - {<systemitem role=\"url\">$1</systemitem>}gs; - -msg ("Adding PARA inside ENTRY..."); -$data =~ s{<entry>(.*?)</entry>} - {<entry><para>$1</para></entry>}gs; - -msg ("Fixing spacing problem with titles..."); -$data =~ s{(</\w+>)(\w{2,})} - {$1 $2}gs; - -msg ("Adding closing / to XREF and COLSPEC tags..."); -$data =~ s{<(xref|colspec) (.+?)>} - {<$1 $2 />}gs; - -# arjen 2002-04-26 -msg ("Removing separate target titles from LINKs and make them XREFs..."); -$data =~ s{<link (linkend=.+?)>.+?</link>} - {<xref $1 />}gs; - -# Probably need to strip these -msg ('Adding "See " to XREFs that used to be @xref...'); -$data =~ s{([.'!)])\s*<xref } - {$1 See <xref }gs; - -msg ('Adding "see " to (XREFs) that used to be (@pxref)...'); -$data =~ s{([([,;])(\s*)<xref } - {$1$2see <xref }gs; - -msg ("Making first row in table THEAD..."); -$data =~ s{( *)<tbody>(\s*<row>.+?</row>)} - {$1<thead>$2\n$1</thead>\n$1<tbody>}gs; - -msg ("Removing EMPHASIS inside THEAD..."); -$data =~ s{<thead>(.+?)</thead>} - {"<thead>".&strip_tag($1, 'emphasis')."</thead>"}gsex; - -msg ("Removing empty PARA..."); -$data =~ s{<para>\s*</para>} - {}gs; - -msg ("Removing lf before /PARA in ENTRY..."); -$data =~ s{\n(</para></entry>)} - {$1}gs; - -msg ("Removing whitespace before /PARA if not on separate line..."); -$data =~ s{(\S+)[\t ]+</para>} - {$1</para>}g; - -msg ("Removing PARA around INDEXTERM if no text in PARA..."); -$data =~ s{<para>((?:<indexterm role=\"[^"]+\">(?:<(primary|secondary)>[^>]+</\2>)+?</indexterm>)+?)\s*</para>} - {$1}gs; - -@apx = ("Users", "MySQL Testimonials", "News", "GPL-license", "LGPL-license"); - -foreach $apx (@apx) { - msg ("Removing appendix $apx..."); - $data =~ s{<appendix id=\"$apx\">(.+?)</appendix>} - {}gs; - - # Skip to next appendix regex if the regex did not match anything - next unless (defined $&); - - msg ("...Building list of removed nodes..."); - - # Split the last bracketed regex match into an array - # Extract the node names from the tags and push them into an array - foreach (split "\n", $&) { - push @nodes, $1 if /<\w+ id=\"(.+?)\">/ - } -} - -# 2002-02-22 arjen@mysql.com (added fix " /" to end of regex, to make it match) -msg ("Fixing references to removed nodes..."); -# Merge the list of node names into a set of regex alternations -$nodes = join "|", @nodes; - -# Find all references to removed nodes and convert them to absolute URLs -$data =~ s{<\w+ linkend="($nodes)" />} - {&xref2link($1)}ges; - -print STDOUT $data; -exit; - -# -# Definitions for helper sub-routines -# - -sub msg { - print STDERR "docbook-fixup:", shift, "\n"; -} - -sub strip_tag($$) { - (my $str, my $tag) = @_; - $str =~ s{<$tag>(.+?)</$tag>}{$1}gs; - return $str; -} - -sub underscore2hyphen($) { - my $str = shift; - $str =~ tr/_/-/; - return $str; -} - -sub xref2link { - my $ref = shift; - $ref =~ tr/ /_/; - $ref =~ s{^((.)(.).+)$}{$2/$3/$1.html}; - return "http://www.mysql.com/doc/" . $ref; -} - -# We might need to encode the high-bit characters to ensure proper representation -# msg ("Converting high-bit characters to entities"); -# $data =~ s/([\200-\400])/&get_entity($1)>/gs; -# There is no get_entity function yet - no point writing it til we need it :) |