summaryrefslogtreecommitdiff
path: root/util/txixml2texi.pl
diff options
context:
space:
mode:
Diffstat (limited to 'util/txixml2texi.pl')
-rwxr-xr-xutil/txixml2texi.pl452
1 files changed, 452 insertions, 0 deletions
diff --git a/util/txixml2texi.pl b/util/txixml2texi.pl
new file mode 100755
index 0000000..fa3413c
--- /dev/null
+++ b/util/txixml2texi.pl
@@ -0,0 +1,452 @@
+#! /usr/bin/env perl
+#
+# texixml2texi -- convert Texinfo XML to Texinfo code
+#
+# Copyright 2012 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Original author: Patrice Dumas <pertusus@free.fr>
+
+use strict;
+use Getopt::Long qw(GetOptions);
+# for dirname.
+use File::Basename;
+use File::Spec;
+
+Getopt::Long::Configure("gnu_getopt");
+
+BEGIN
+{
+ # emulate -w
+ $^W = 1;
+ my ($real_command_name, $command_directory, $command_suffix)
+ = fileparse($0, '.pl');
+
+ my $datadir = '@datadir@';
+ my $package = '@PACKAGE@';
+ my $updir = File::Spec->updir();
+
+ my $texinfolibdir;
+ my $lib_dir;
+
+ # in-source run
+ if (($command_suffix eq '.pl' and !(defined($ENV{'TEXINFO_DEV_SOURCE'})
+ and $ENV{'TEXINFO_DEV_SOURCE'} eq 0)) or $ENV{'TEXINFO_DEV_SOURCE'}) {
+ my $srcdir = defined $ENV{'srcdir'} ? $ENV{'srcdir'} : $command_directory;
+ $texinfolibdir = File::Spec->catdir($srcdir, $updir, 'tp');
+ $lib_dir = File::Spec->catdir($texinfolibdir, 'maintain');
+ unshift @INC, $texinfolibdir;
+ } elsif ($datadir ne '@' .'datadir@' and $package ne '@' . 'PACKAGE@'
+ and $datadir ne '') {
+ $texinfolibdir = File::Spec->catdir($datadir, $package);
+ # try to make package relocatable, will only work if standard relative paths
+ # are used
+ if (! -f File::Spec->catfile($texinfolibdir, 'Texinfo', 'Parser.pm')
+ and -f File::Spec->catfile($command_directory, $updir, 'share',
+ 'texinfo', 'Texinfo', 'Parser.pm')) {
+ $texinfolibdir = File::Spec->catdir($command_directory, $updir,
+ 'share', 'texinfo');
+ }
+ $lib_dir = $texinfolibdir;
+ unshift @INC, $texinfolibdir;
+ }
+
+ # '@USE_EXTERNAL_LIBINTL @ and similar are substituted in the
+ # makefile using values from configure
+ if (defined($texinfolibdir)) {
+ if ('@USE_EXTERNAL_LIBINTL@' ne 'yes') {
+ unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'libintl-perl', 'lib'));
+ }
+ if ('@USE_EXTERNAL_EASTASIANWIDTH@' ne 'yes') {
+ unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'Unicode-EastAsianWidth', 'lib'));
+ }
+ if ('@USE_EXTERNAL_UNIDECODE@' ne 'yes') {
+ unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'Text-Unidecode', 'lib'));
+ }
+ }
+}
+
+use XML::LibXML::Reader;
+
+# gather information on Texinfo XML elements
+use Texinfo::Common;
+use Texinfo::Convert::TexinfoXML;
+
+my $debug = 0;
+my $result_options = Getopt::Long::GetOptions (
+ 'debug|d' => \$debug,
+);
+
+sub command_with_braces($)
+{
+ my $command = shift;
+ if ($command =~ /^[a-z]/i) {
+ return "\@".$command.'{}';
+ } else {
+ return "\@".$command;
+ }
+}
+
+my %ignored_elements = (
+ 'prepend' => 1,
+ 'formalarg' => 1,
+ # not ignored everytime
+ 'indexterm' => 1,
+);
+
+my %elements_end_attributes = (
+ 'accent' => 1,
+ 'menunode' => 1,
+ 'menutitle' => 1,
+);
+
+my %element_at_commands;
+my %entity_texts = (
+ 'textldquo' => '``',
+ 'textrdquo' => "''",
+ 'textmdash' => '---',
+ 'textndash' => '--',
+ 'textrsquo' => "'",
+ 'textlsquo' => '`',
+ 'formfeed' => "\f",
+ # this is not used in pratice, as attrformfeed appears in an
+ # attribute and thus is already expanded to text.
+ 'attrformfeed' => "\f",
+);
+
+foreach my $command (keys(%Texinfo::Convert::TexinfoXML::commands_formatting)) {
+ if (!ref($Texinfo::Convert::TexinfoXML::commands_formatting{$command})) {
+ $entity_texts{$Texinfo::Convert::TexinfoXML::commands_formatting{$command}}
+ = command_with_braces($command);
+ } else {
+ my $spec = $Texinfo::Convert::TexinfoXML::commands_formatting{$command};
+ my $element = $spec->[0];
+ if ($element eq 'spacecmd') {
+ if ($spec->[1] eq 'type') {
+ $element_at_commands{$element}->{"type"}->{$spec->[2]}
+ = command_with_braces($command);
+ } else {
+ die "BUG, bad spacecmd specification";
+ }
+ } else {
+ $element_at_commands{$element} = command_with_braces($command);
+ }
+ }
+}
+
+$element_at_commands{'accent'} = 0;
+
+my %arg_elements;
+foreach my $command (keys(%Texinfo::Convert::TexinfoXML::commands_args_elements)) {
+ my $arg_index = 0;
+ foreach my $element_argument (@{$Texinfo::Convert::TexinfoXML::commands_args_elements{$command}}) {
+ $arg_elements{$element_argument} = [$arg_index, $command];
+ $arg_index++;
+ }
+}
+
+my %accent_type_command;
+foreach my $accent_command (keys(%Texinfo::Convert::TexinfoXML::accent_types)) {
+ $accent_type_command{$Texinfo::Convert::TexinfoXML::accent_types{$accent_command}}
+ = $accent_command;
+}
+
+my %eat_space_elements;
+foreach my $element ('texinfo', 'filename') {
+ $eat_space_elements{$element} = 1;
+}
+
+my $infile = shift @ARGV;
+
+if (!defined($infile) or $infile !~ /\S/) {
+ die "Missing file\n";
+}
+
+my $reader = XML::LibXML::Reader->new('location' => $infile,
+ 'expand_entities' => 0,
+ )
+ or die "cannot read $infile\n";
+
+#(my $mydir = $0) =~ s,/[^/]*$,,; # dir we are in
+#my $txi_dtd_libdir = "$mydir"; # find tp relative to $0
+
+sub skip_until_end($$)
+{
+ my $reader = shift;
+ my $name = shift;
+ while ($reader->read) {
+ if ($reader->nodeType() eq XML_READER_TYPE_END_ELEMENT
+ and $reader->name eq $name) {
+ return;
+ }
+ }
+}
+
+my $eat_space = 0;
+my @commands_with_args_stack;
+
+while ($reader->read) {
+
+ # ============================================================ begin debug
+ if ($debug) {
+ printf STDERR "(args: @commands_with_args_stack) (eat_space $eat_space) %d %d %s %d", ($reader->depth,
+ $reader->nodeType,
+ $reader->name,
+ $reader->isEmptyElement);
+ my $value = '';
+ if ($reader->hasValue()) {
+ $value = $reader->value();
+ $value =~ s/\n/\\n/g;
+ print STDERR " |$value|";
+ }
+ if ($reader->nodeType() eq XML_READER_TYPE_ELEMENT
+ and $reader->hasAttributes()
+ and defined($reader->getAttribute('spaces'))) {
+ my $spaces = $reader->getAttribute('spaces');
+ print STDERR " spaces:$spaces|";
+ }
+ print STDERR "\n";
+ }
+ # ============================================================ end debug
+
+ if ($reader->nodeType() eq XML_READER_TYPE_SIGNIFICANT_WHITESPACE
+ and $eat_space) {
+ $eat_space = 0;
+ next;
+ } elsif ($reader->nodeType() eq XML_READER_TYPE_TEXT
+ or $reader->nodeType() eq XML_READER_TYPE_WHITESPACE
+ or $reader->nodeType() eq XML_READER_TYPE_SIGNIFICANT_WHITESPACE
+ ) {
+ if ($reader->hasValue()) {
+ print $reader->value();
+ }
+ }
+ my $name = $reader->name;
+ if ($reader->nodeType() eq XML_READER_TYPE_ELEMENT) {
+ if (($name eq 'entry' or $name eq 'indexcommand')
+ and $reader->hasAttributes()
+ and defined($reader->getAttribute('command'))) {
+ $name = $reader->getAttribute('command');
+ } elsif ($name eq 'listitem') {
+ $name = 'item';
+ }
+ if ($Texinfo::Convert::TexinfoXML::commands_args_elements{$name}) {
+ push @commands_with_args_stack, 0;
+ }
+ if (exists $element_at_commands{$name}) {
+ if ($name eq 'accent') {
+ if ($reader->hasAttributes()) {
+ if (defined($reader->getAttribute('type'))) {
+ my $command = $accent_type_command{$reader->getAttribute('type')};
+ print "\@$command"
+ if (defined($command));
+ }
+ if (!defined($reader->getAttribute('spaces'))
+ and !(defined($reader->getAttribute('bracketed'))
+ and $reader->getAttribute('bracketed') eq 'off')) {
+ print '{';
+ }
+ } else {
+ print '{';
+ }
+ } elsif (!ref($element_at_commands{$name})) {
+ print $element_at_commands{$name};
+ } else {
+ my ($attribute) = keys(%{$element_at_commands{$name}});
+ if ($reader->hasAttributes()
+ and defined($reader->getAttribute($attribute))) {
+ print
+ $element_at_commands{$name}->{$attribute}->{$reader->getAttribute($attribute)};
+ }
+ }
+ } elsif (exists($Texinfo::Common::brace_commands{$name})) {
+ print "\@${name}{";
+ if ($name eq 'verb' and $reader->hasAttributes()
+ and defined($reader->getAttribute('delimiter'))) {
+ print $reader->getAttribute('delimiter');
+ }
+ } elsif (exists($Texinfo::Common::block_commands{$name})) {
+ print "\@$name";
+ if ($name eq 'macro') {
+ if ($reader->hasAttributes() and defined($reader->getAttribute('line'))) {
+ print $reader->getAttribute('line');
+ }
+ print "\n";
+ }
+ } elsif (defined($Texinfo::Common::misc_commands{$name})) {
+ if ($reader->hasAttributes()
+ and defined($reader->getAttribute('originalcommand'))) {
+ $name = $reader->getAttribute('originalcommand');
+ }
+ if ($name eq 'documentencoding' and $reader->hasAttributes()
+ and defined($reader->getAttribute('encoding'))) {
+ my ($texinfo_encoding, $perl_encoding, $output_encoding)
+ = Texinfo::Encoding::encoding_alias($reader->getAttribute('encoding'));
+
+ if (defined($perl_encoding)) {
+ if ($debug) {
+ print STDERR "Using encoding $perl_encoding\n";
+ }
+ binmode(STDOUT, ":encoding($perl_encoding)");
+ }
+ }
+ print "\@$name";
+ if ($reader->hasAttributes() and defined($reader->getAttribute('line'))) {
+ my $line = $reader->getAttribute('line');
+ $line =~ s/\\\\/\x{1F}/g;
+ $line =~ s/\\f/\f/g;
+ $line =~ s/\x{1F}/\\/g;
+ print $line;
+ }
+ if ($name eq 'set' or $name eq 'clickstyle') {
+ skip_until_end($reader, $name);
+ }
+ } elsif ($arg_elements{$name}) {
+ if ($reader->hasAttributes()
+ and defined($reader->getAttribute('automatic'))
+ and $reader->getAttribute('automatic') eq 'on') {
+ skip_until_end($reader, $name);
+ next;
+ }
+ while ($arg_elements{$name}->[0]
+ and $commands_with_args_stack[-1] < $arg_elements{$name}->[0]) {
+ $commands_with_args_stack[-1]++;
+ print ',';
+ }
+ } elsif ($ignored_elements{$name}) {
+ my $keep_indexterm = 0;
+ if ($name eq 'indexterm') {
+ my $node_path = $reader->nodePath();
+ if ($node_path =~ m:([a-z]+)/indexterm$:) {
+ my $parent = $1;
+ if ($parent =~ /^[a-z]?[a-z]index$/ or $parent eq 'indexcommand') {
+ $keep_indexterm = 1;
+ }
+ }
+ }
+ if (!$keep_indexterm) {
+ skip_until_end($reader, $name);
+ next;
+ }
+ } elsif ($name eq 'formattingcommand') {
+ if ($reader->hasAttributes()
+ and defined($reader->getAttribute('command'))) {
+ print '@'.$reader->getAttribute('command');
+ }
+ # def* automatic
+ } elsif ($reader->hasAttributes()
+ and defined($reader->getAttribute('automatic'))
+ and $reader->getAttribute('automatic') eq 'on') {
+ skip_until_end($reader, $name);
+ # eat the following space
+ $reader->read();
+ } elsif ($eat_space_elements{$name}) {
+ $eat_space = 1;
+ } else {
+ print STDERR "UNKNOWN $name\n" if ($debug);
+ }
+ if ($reader->hasAttributes()) {
+ if (defined($reader->getAttribute('bracketed'))
+ and $reader->getAttribute('bracketed') eq 'on') {
+ print '{';
+ }
+ if (defined($reader->getAttribute('spaces'))) {
+ my $spaces = $reader->getAttribute('spaces');
+ $spaces =~ s/\\n/\n/g;
+ $spaces =~ s/\\f/\f/g;
+ print $spaces;
+ }
+ if (defined($reader->getAttribute('leadingtext'))) {
+ print $reader->getAttribute('leadingtext');
+ }
+ }
+ if ($Texinfo::Common::item_line_commands{$name}
+ and $reader->hasAttributes()
+ and defined($reader->getAttribute('commandarg'))) {
+ print '@'.$reader->getAttribute('commandarg');
+ }
+ } elsif ($reader->nodeType() eq XML_READER_TYPE_END_ELEMENT) {
+ if ($Texinfo::Convert::TexinfoXML::commands_args_elements{$name}) {
+ pop @commands_with_args_stack;
+ }
+ if ($reader->hasAttributes()) {
+ if (defined($reader->getAttribute('bracketed'))
+ and $reader->getAttribute('bracketed') eq 'on') {
+ print '}';
+ }
+ }
+ if (exists ($Texinfo::Common::brace_commands{$name})) {
+ if ($name eq 'verb' and $reader->hasAttributes()
+ and defined($reader->getAttribute('delimiter'))) {
+ print $reader->getAttribute('delimiter');
+ }
+ print '}';
+ } elsif (exists($Texinfo::Common::block_commands{$name})) {
+ my $end_spaces;
+ if ($reader->hasAttributes()
+ and defined($reader->getAttribute('endspaces'))) {
+ $end_spaces = $reader->getAttribute('endspaces');
+ }
+ $end_spaces = ' ' if (!defined($end_spaces) or $end_spaces eq '');
+ print "\@end".$end_spaces."$name";
+ } elsif (defined($Texinfo::Common::misc_commands{$name})) {
+ if ($Texinfo::Common::root_commands{$name} and $name ne 'node') {
+ $eat_space = 1;
+ }
+ } elsif ($elements_end_attributes{$name}) {
+ if ($name eq 'accent') {
+ if ($reader->hasAttributes()) {
+ if (!defined($reader->getAttribute('spaces'))
+ and !(defined($reader->getAttribute('bracketed'))
+ and $reader->getAttribute('bracketed') eq 'off')) {
+ print '}';
+ }
+ } else {
+ print '}';
+ }
+ } elsif ($reader->hasAttributes()
+ and defined($reader->getAttribute('separator'))) {
+ print $reader->getAttribute('separator');
+ }
+ } elsif ($eat_space_elements{$name}) {
+ $eat_space = 1;
+ } else {
+ print STDERR "END UNKNOWN $name\n" if ($debug);
+ }
+ if ($reader->hasAttributes()
+ and defined($reader->getAttribute('trailingspaces'))) {
+ my $trailingspaces = $reader->getAttribute('trailingspaces');
+ $trailingspaces =~ s/\\f/\f/g;
+ print $trailingspaces;
+ }
+ } elsif ($reader->nodeType() eq XML_READER_TYPE_ENTITY_REFERENCE) {
+ if (defined($entity_texts{$name})) {
+ print $entity_texts{$name};
+ }
+ } elsif ($reader->nodeType() eq XML_READER_TYPE_COMMENT) {
+ my $comment;
+ if ($reader->hasValue()) {
+ $comment = $reader->value();
+ $comment =~ s/^ (comment|c)//;
+ my $command = $1;
+ $comment =~ s/ $//;
+ print "\@${command}$comment";
+ }
+ } elsif ($reader->nodeType() eq XML_READER_TYPE_DOCUMENT_TYPE) {
+ $eat_space = 1;
+ }
+}
+
+1;