Docs/Support/docbook-fixup.pl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

#!/usr/bin/perl -w
# 2002-02-15 zak@mysql.com
# Use -w to make perl print useful warnings about the script being run

sub fix_underscore {
  $str = shift;
  $str =~ tr/_/-/;
  return $str;
};

sub strip_emph {
  $str = shift;
  $str =~ s{<emphasis>(.+?)</emphasis>}
           {$1}gs;
  return $str;
};

print STDERR "\n--Post-processing makeinfo output--\n";

# 2002-02-15 zak@mysql.com
print STDERR "Discard DTD - ORA can add the appropriate DTD for their flavour of DocBook\n";
<STDIN>;

print STDERR "Slurp! In comes the rest of the file. :)\n";
$data = join "", <STDIN>;

# 2002-02-15 zak@mysql.com
print STDERR "Add an XML processing instruction with the right character encoding\n";
$data = "<?xml version='1.0' encoding='ISO-8859-1'?>" . $data;

# 2002-02-15 zak@mysql.com
# Less than optimal - should be fixed in makeinfo
print STDERR "Put in missing <bookinfo> and <abstract>\n";
$data =~ s/<book lang="en">/<book lang="en"><bookinfo><abstract>/gs;

# 2002-02-15 zak@mysql.com
print STDERR "Convert existing ampersands to escape sequences \n";
$data =~ s/&(?!\w+;)/&amp;/gs;

# 2002-02-15 zak@mysql.com
# Need to talk to Arjen about what the <n> bits are for
print STDERR "Rework references of the notation '<n>'\n";
$data =~ s/<(\d)>/[$1]/gs;
  
# 2002-02-15 zak@mysql.com
# We might need to encode the high-bit characters to ensure proper representation
# print STDERR "Converting high-bit characters to entities\n";
# $data =~ s/([\200-\400])/&get_entity($1)>/gs;
# There is no get_entity function yet - no point writing it til we need it :)

print STDERR "Changing @@ to @...\n";
$data =~ s/@@/@/gs;

print STDERR "Changing '_' to '-' in references...\n";
$data =~ s{id=\"(.+?)\"}
          {"id=\"".&fix_underscore($1)."\""}gsex;
$data =~ s{linkend=\"(.+?)\"}
          {"linkend=\"".&fix_underscore($1)."\""}gsex;

print STDERR "Changing ULINK to SYSTEMITEM...\n";
$data =~ s{<ulink url=\"(.+?)\"></ulink>}
          {<systemitem role=\"url\">$1</systemitem>}gs;

print STDERR "Removing INFORMALFIGURE...\n";
$data =~ s{<informalfigure>(.+?)</informalfigure>}
          {}gs;

print STDERR "Adding PARA inside ENTRY...\n";
$data =~ s{<entry>(.*?)</entry>}
          {<entry><para>$1</para></entry>}gs;

print STDERR "Removing mailto: from email addresses...\n";
$data =~ s{mailto:}
          {}gs;

print STDERR "Fixing spacing problem with titles...\n";
$data =~ s{</(\w+)>(\w{2,})}
          {</$1> $2}gs;

# 2002-02-15 arjen@mysql.com
print STDERR "Adding closing / to XREF...\n";
$data =~ s{<xref (.+?)>}
          {<xref $1 />}gs;

# 2002-01-30 arjen@mysql.com
print STDERR "Removing COLSPEC...\n";
$data =~ s{\n *<colspec colwidth=\"[0-9]+\*\">}
          {}gs;

# 2002-01-31 arjen@mysql.com
print STDERR "Making first row in table THEAD...\n";
$data =~ s{([ ]*)<tbody>\n([ ]*<row>(.+?)</row>)}
          {$1<thead>\n$2\n$1</thead>\n$1<tbody>}gs;

# 2002-01-31 arjen@mysql.com
print STDERR "Removing EMPHASIS inside THEAD...\n";
$data =~ s{<thead>(.+?)</thead>}
          {"<thead>".&strip_emph($1)."</thead>"}gsex;

# 2002-01-31 arjen@mysql.com
print STDERR "Removing lf before /PARA in ENTRY...\n";
$data =~ s{(<entry><para>(.+?))\n(</para></entry>)}
          {$1$3}gs;

# 2002-01-31 arjen@mysql.com (2002-02-15 added \n stuff)
print STDERR "Removing whitespace before /PARA if not on separate line...\n";
$data =~ s{([^\n ])[ ]+</para>}
          {$1</para>}gs;

# 2002-01-31 arjen@mysql.com
print STDERR "Removing empty PARA in ENTRY...\n";
$data =~ s{<entry><para></para></entry>}
          {<entry></entry>}gs;

# 2002-01-31 arjen@mysql.com
print STDERR "Removing PARA around INDEXENTRY if no text in PARA...\n";
$data =~ s{<para>((<indexterm role=\"(cp|fn)\">(<(primary|secondary)>[^<]+?</(primary|secondary)>)+?</indexterm>)+?)[\n]*</para>[\n]*}
          {$1\n}gs;

# -----

@apx = ("Users", "MySQL Testimonials", "News",
        "GPL-license", "LGPL-license");

foreach $apx (@apx) {
  print STDERR "Removing appendix $apx...\n";
  $data =~ s{<appendix id=\"$apx\">(.+?)</appendix>}
            {}gs;

  print STDERR " ... Building list of removed nodes ...\n";
  foreach(split "\n", $&) {
    push @nodes, $2 if(/<(\w+) id=\"(.+?)\">/)
  };
};

print STDERR "Fixing references to removed nodes...\n";
foreach $node (@nodes) {
  $web = $node;
  $web =~ s/[ ]/_/;
  $web = "http://www.mysql.com/doc/" .
         (join "/", (split //, $web)[0..1])."/$web.html";
  print STDERR "$node -> $web\n";
  $data =~ s{<(\w+) linkend=\"$node\">}
            {$web}gs;
};

print STDOUT $data;