diff options
Diffstat (limited to 'Docs/Support')
-rwxr-xr-x | Docs/Support/docbook-fixup.pl | 247 | ||||
-rwxr-xr-x | Docs/Support/docbook-split | 98 |
2 files changed, 163 insertions, 182 deletions
diff --git a/Docs/Support/docbook-fixup.pl b/Docs/Support/docbook-fixup.pl index 3f329423279..e8477f58cdb 100755 --- a/Docs/Support/docbook-fixup.pl +++ b/Docs/Support/docbook-fixup.pl @@ -1,158 +1,165 @@ #!/usr/bin/perl -w -# 2002-02-15 zak@mysql.com -# Use -w to make perl print useful warnings about the script being run -sub fix_underscore { - $str = shift; - $str =~ tr/_/-/; - return $str; -}; +# Fix the output of `makeinfo --docbook` version 4.0c +# Convert the broken docbook output to well-formed XML that conforms to the O'Reilly idiom +# See code for detailed comments +# Authors: Arjen Lentz and Zak Greant -sub strip_emph { - $str = shift; - $str =~ s{<emphasis>(.+?)</emphasis>} - {$1}gs; - return $str; -}; +use strict; -print STDERR "\n--Post-processing makeinfo output--\n"; +my $data = ''; +my @apx = (); +my $apx = ''; +my @nodes = (); +my $nodes = ''; -# 2002-02-15 zak@mysql.com -print STDERR "Discard DTD - ORA can add the appropriate DTD for their flavour of DocBook\n"; -<STDIN>; +msg ("\n-- Post-processing `makeinfo --docbook` output --"); +msg ("** Written to work with makeinfo version 4.0c **\n"); -print STDERR "Slurp! In comes the rest of the file. :)\n"; -$data = join "", <STDIN>; +msg ("Discarding DTD - not required by subsequent scripts"); +# <> is a magic filehandle - either reading lines from stdin or from file(s) specified on the command line +<>; -# 2002-02-15 zak@mysql.com -print STDERR "Add an XML processing instruction with the right character encoding\n"; -$data = "<?xml version='1.0' encoding='ISO-8859-1'?>" . $data; +msg ("Create an XML PI with ISO-8859-1 character encoding"); +$data = "<?xml version='1.0' encoding='ISO-8859-1'?>"; -# 2002-02-15 zak@mysql.com -# Less than optimal - should be fixed in makeinfo -print STDERR "Put in missing <bookinfo> and <abstract>\n"; -$data =~ s/<book lang="en">/<book lang="en"><bookinfo><abstract>/gs; +msg ("Get the rest of the data"); +$data = $data . join "", <>; -# 2002-02-15 zak@mysql.com -print STDERR "Convert existing ampersands to escape sequences \n"; -$data =~ s/&(?!\w+;)/&/gs; +msg ("Add missing <bookinfo> and <abstract> opening tags"); +# Note the absence of the g (global) pattern modified. This situation can only happen once. +# ...as soon as we find the first instance, we can stop looking. +$data =~ s/<book lang="en">/<book lang="en"><bookinfo><abstract>/; -# 2002-02-15 zak@mysql.com -# Need to talk to Arjen about what the <n> bits are for -print STDERR "Rework references of the notation '<n>'\n"; -$data =~ s/<(\d)>/[$1]/gs; +msg ("Removing mailto: from email addresses..."); +$data =~ s/mailto://g; + +msg ("Removing INFORMALFIGURE..."); +$data =~ s{<informalfigure>.+?</informalfigure>} + {}gs; + +msg ("Convert ampersands to XML escape sequences "); +$data =~ s/&(?!\w+;)/&/g; -# 2002-02-15 zak@mysql.com -# We might need to encode the high-bit characters to ensure proper representation -# print STDERR "Converting high-bit characters to entities\n"; -# $data =~ s/([\200-\400])/&get_entity($1)>/gs; -# There is no get_entity function yet - no point writing it til we need it :) +msg ("Changing @@ to @..."); +$data =~ s/@@/@/g; -print STDERR "Changing @@ to @...\n"; -$data =~ s/@@/@/gs; +msg ("Rework references of the notation '<n>'"); +# Need to talk to Arjen about what the <n> bits are for +$data =~ s/<(\d)>/[$1]/g; -print STDERR "Changing '_' to '-' in references...\n"; -$data =~ s{id=\"(.+?)\"} - {"id=\"".&fix_underscore($1)."\""}gsex; -$data =~ s{linkend=\"(.+?)\"} - {"linkend=\"".&fix_underscore($1)."\""}gsex; +msg ("Changing '_' to '-' in references..."); +$data =~ s{((?:id|linkend)=\".+?\")} + {&underscore2hyphen($1)}gex; -print STDERR "Changing ULINK to SYSTEMITEM...\n"; -$data =~ s{<ulink url=\"(.+?)\"></ulink>} +msg ("Changing ULINK to SYSTEMITEM..."); +$data =~ s{<ulink url=\"(.+?)\">\s*</ulink>} {<systemitem role=\"url\">$1</systemitem>}gs; -print STDERR "Removing INFORMALFIGURE...\n"; -$data =~ s{<informalfigure>(.+?)</informalfigure>} - {}gs; - -print STDERR "Adding PARA inside ENTRY...\n"; +msg ("Adding PARA inside ENTRY..."); $data =~ s{<entry>(.*?)</entry>} {<entry><para>$1</para></entry>}gs; -print STDERR "Removing mailto: from email addresses...\n"; -$data =~ s{mailto:} - {}gs; - -print STDERR "Fixing spacing problem with titles...\n"; -$data =~ s{</(\w+)>(\w{2,})} - {</$1> $2}gs; +msg ("Fixing spacing problem with titles..."); +$data =~ s{(</\w+>)(\w{2,})} + {$1 $2}gs; -# 2002-02-15 arjen@mysql.com -print STDERR "Adding closing / to XREF...\n"; -$data =~ s{<xref (.+?)>} - {<xref $1 />}gs; +msg ("Adding closing / to XREF and COLSPEC tags..."); +$data =~ s{<(xref|colspec) (.+?)>} + {<$1 $2 />}gs; -# 2002-02-22 arjen@mysql.com -print STDERR "Adding \"See \" to XREFs that used to be \@xref...\n"; -$data =~ s{([\.\'\!\)])[\n ]*<xref } +# Probably need to strip these +msg ('Adding "See " to XREFs that used to be @xref...'); +$data =~ s{([.'!)])\s*<xref } {$1 See <xref }gs; -# 2002-02-22 arjen@mysql.com -print STDERR "Adding \"see \" to (XREFs) that used to be (\@pxref)...\n"; -$data =~ s{(\(|[[,;])([\n]*[ ]*)<xref } +msg ('Adding "see " to (XREFs) that used to be (@pxref)...'); +$data =~ s{([([,;])(\s*)<xref } {$1$2see <xref }gs; -# 2002-01-30 arjen@mysql.com -print STDERR "Removing COLSPEC...\n"; -$data =~ s{\n *<colspec colwidth=\"[0-9]+\*\">} - {}gs; - -# 2002-01-31 arjen@mysql.com -print STDERR "Making first row in table THEAD...\n"; -$data =~ s{([ ]*)<tbody>\n([ ]*<row>(.+?)</row>)} - {$1<thead>\n$2\n$1</thead>\n$1<tbody>}gs; +msg ("Making first row in table THEAD..."); +$data =~ s{( *)<tbody>(\s*<row>.+?</row>)} + {$1<thead>$2\n$1</thead>\n$1<tbody>}gs; -# 2002-01-31 arjen@mysql.com -print STDERR "Removing EMPHASIS inside THEAD...\n"; +msg ("Removing EMPHASIS inside THEAD..."); $data =~ s{<thead>(.+?)</thead>} - {"<thead>".&strip_emph($1)."</thead>"}gsex; + {"<thead>".&strip_tag($1, 'emphasis')."</thead>"}gsex; -# 2002-01-31 arjen@mysql.com -print STDERR "Removing lf before /PARA in ENTRY...\n"; -$data =~ s{(<entry><para>(.+?))\n(</para></entry>)} - {$1$3}gs; - -# 2002-01-31 arjen@mysql.com (2002-02-15 added \n stuff) -print STDERR "Removing whitespace before /PARA if not on separate line...\n"; -$data =~ s{([^\n ])[ ]+</para>} - {$1</para>}gs; +msg ("Removing empty PARA..."); +$data =~ s{<para>\s*</para>} + {}gs; -# 2002-01-31 arjen@mysql.com -print STDERR "Removing empty PARA in ENTRY...\n"; -$data =~ s{<entry><para></para></entry>} - {<entry></entry>}gs; +msg ("Removing lf before /PARA in ENTRY..."); +$data =~ s{\n(</para></entry>)} + {$1}gs; -# 2002-01-31 arjen@mysql.com -print STDERR "Removing PARA around INDEXENTRY if no text in PARA...\n"; -$data =~ s{<para>((<indexterm role=\"(cp|fn)\">(<(primary|secondary)>[^<]+?</(primary|secondary)>)+?</indexterm>)+?)[\n]*</para>[\n]*} - {$1\n}gs; +msg ("Removing whitespace before /PARA if not on separate line..."); +$data =~ s{(\S+)[\t ]+</para>} + {$1</para>}g; -# ----- +msg ("Removing PARA around INDEXTERM if no text in PARA..."); +$data =~ s{<para>((?:<indexterm role=\"(?:cp|fn)\">(?:<(primary|secondary)>[^>]+</\2>)+?</indexterm>)+?)\s*</para>} + {$1}gs; -@apx = ("Users", "MySQL Testimonials", "News", - "GPL-license", "LGPL-license"); +@apx = ("Users", "MySQL Testimonials", "News", "GPL-license", "LGPL-license"); foreach $apx (@apx) { - print STDERR "Removing appendix $apx...\n"; - $data =~ s{<appendix id=\"$apx\">(.+?)</appendix>} - {}gs; - - print STDERR " ... Building list of removed nodes ...\n"; - foreach(split "\n", $&) { - push @nodes, $2 if(/<(\w+) id=\"(.+?)\">/) - }; -}; + msg ("Removing appendix $apx..."); + $data =~ s{<appendix id=\"$apx\">(.+?)</appendix>} + {}gs; + + # Skip to next appendix regex if the regex did not match anything + next unless (defined $&); + + msg ("...Building list of removed nodes..."); + + # Split the last bracketed regex match into an array + # Extract the node names from the tags and push them into an array + foreach (split "\n", $&) { + push @nodes, $1 if /<\w+ id=\"(.+?)\">/ + } +} # 2002-02-22 arjen@mysql.com (added fix " /" to end of regex, to make it match) -print STDERR "Fixing references to removed nodes...\n"; -foreach $node (@nodes) { - $web = $node; - $web =~ s/[ ]/_/; - $web = "http://www.mysql.com/doc/" . - (join "/", (split //, $web)[0..1])."/$web.html"; - print STDERR "$node -> $web\n"; - $data =~ s{<(\w+) linkend=\"$node\" />} - {$web}gs; -}; +msg ("Fixing references to removed nodes..."); +# Merge the list of node names into a set of regex alternations +$nodes = join "|", @nodes; + +# Find all references to removed nodes and convert them to absolute URLs +$data =~ s{<\w+ linkend="($nodes)" />} + {&xref2link($1)}ges; print STDOUT $data; +exit; + +# +# Definitions for helper sub-routines +# + +sub msg { + print STDERR shift, "\n"; +} + +sub strip_tag($$) { + (my $str, my $tag) = @_; + $str =~ s{<$tag>(.+?)</$tag>}{$1}gs; + return $str; +} + +sub underscore2hyphen($) { + my $str = shift; + $str =~ tr/_/-/; + return $str; +} + +sub xref2link { + my $ref = shift; + $ref =~ tr/ /_/; + $ref =~ s{^((.)(.).+)$}{$2/$3/$1.html}; + return "http://www.mysql.com/doc/" . $ref; +} + +# We might need to encode the high-bit characters to ensure proper representation +# msg ("Converting high-bit characters to entities"); +# $data =~ s/([\200-\400])/&get_entity($1)>/gs; +# There is no get_entity function yet - no point writing it til we need it :) diff --git a/Docs/Support/docbook-split b/Docs/Support/docbook-split index 62fcc866e04..b116769f86e 100755 --- a/Docs/Support/docbook-split +++ b/Docs/Support/docbook-split @@ -1,91 +1,65 @@ -#! /usr/local/bin/perl +#! /usr/bin/perl -w # O'Reilly's Perl script to chop mysql.xml into separate ch/apps/index files. # The indexes are actually not used, they're created straight from the xrefs. - -use strict; - # Breaks the MySQL reference manual into chapters, appendices, and indexes. -my $input_file; -my $directory; -my $chap_num; -my $app_letter; -my $start_text; -my $line; -my $input_file; -my $output_name; - -$input_file = "mysql.xml"; -$directory="chaps_apps_index"; -$chap_num=1; # Start chapter numbers at one (there is no preface) -$app_letter="a"; # Start appendix letters at "a" -$start_text=""; -$line=""; - -open (INPUT_FILE, '<' . $input_file) or die "Cannot open $input_file"; - -if (-d $directory) { - my $unlinked = unlink <$directory/*>; - printf(Removed "%d files\n", $unlinked); -} -else { - mkdir $directory or die "Cannot make $directory subdirectory"; -} +use strict; -while (1) { +my $app_letter = "a"; # Start appendix letters at "a" +my $chap_num = 1; # Start chapter numbers at one (there is no preface) +my $directory = "chaps_apps_index"; +my $ext = ".xml"; +my $line = ""; +my $output_name = ""; +my $start_text = ""; - # Terminating statement for loop. - exit if not defined $line; +mkdir $directory unless -d $directory; - if ($line =~ /(?:.*)(<chapter.*)/i ) { +while (defined $line) { + if ($line =~ /(<chapter.+)/i ) { $start_text = $1; - $output_name = &make_chapter_name($chap_num); - $chap_num++; + $output_name = sprintf("ch%02d%s", $chap_num, $ext); + ++$chap_num; &process_file("chapter"); } - elsif ($line =~ /(?:.*)(<appendix.*)/i ) { + elsif ($line =~ /(<appendix.+)/i ) { $start_text = $1 ; - $output_name = &make_appendix_name($app_letter); - $app_letter++; + $output_name = "app$app_letter$ext"; + ++$app_letter; &process_file("appendix"); } - elsif ($line =~ /(?:.*)(<index\s+id=")(.*?)(">.*)/i ) { + elsif ($line =~ /(<index\s+id=")(.*?)(">.*)/i ) { $start_text = $1 . $2 . $3; - $output_name = lc($2) . ".xml"; + $output_name = lc($2) . $ext; &process_file("index"); } else { - # Automatically skips junk in between chapters, appendices, - # and indexes. - $line = <INPUT_FILE>; + # Skip junk in between chapters, appendices and indexes. + $line = <>; } } -sub make_chapter_name { - my $num = shift; - my $name = "ch" . sprintf("%02d", $num) . ".xml"; - return $name; -} +sub process_file { + my $marker = shift; + my $path = "$directory/$output_name"; -sub make_appendix_name { - my $letter = shift; - my $name = "app" . sprintf("%s", $letter) . ".xml"; - return $name; -} + open (OUTPUT_FILE, ">$path") or die "Cannot open $path"; -sub process_file { - my $marker=shift; - open (OUTPUT_FILE, '>' . $directory . "/" . $output_name) or - die "Cannot open $output_name"; + print STDERR "Creating $path\n"; + + # Print out XML PI + print OUTPUT_FILE "<?xml version='1.0' encoding='ISO-8859-1'?>\n"; + # Print whatever happened to appear at the end of the previous chapter. - print OUTPUT_FILE $start_text . "\n" if $start_text; - while (1) { - $line = <INPUT_FILE>; - exit if not defined $line; + print OUTPUT_FILE "$start_text\n" if $start_text; + + while (defined $line) { + $line = <>; + # Note: Anything after the terminating marker is lost, just like # lines in between chapters. if ($line =~ /(.*<\/\s*$marker\s*>)/i ) { - print OUTPUT_FILE $1 . "\n" if $1; + print OUTPUT_FILE "$1\n" if $1; close OUTPUT_FILE; return; } |