diff options
author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2013-09-26 23:06:02 +0000 |
---|---|---|
committer | <> | 2015-02-03 11:56:22 +0000 |
commit | e0b511b834f3529395df67126a7314097c2cf97e (patch) | |
tree | 89945ae53183ab2acdc61659c8b0b3e57e4a1f3a /util/txixml2texi.pl | |
parent | 2d8ae7b161658c4a589172db0072fc99f76fa979 (diff) | |
download | texinfo-tarball-e0b511b834f3529395df67126a7314097c2cf97e.tar.gz |
Imported from /home/lorry/working-area/delta_texinfo-tarball/texinfo-5.2.tar.xz.HEADtexinfo-5.2master
Diffstat (limited to 'util/txixml2texi.pl')
-rwxr-xr-x | util/txixml2texi.pl | 452 |
1 files changed, 452 insertions, 0 deletions
diff --git a/util/txixml2texi.pl b/util/txixml2texi.pl new file mode 100755 index 0000000..fa3413c --- /dev/null +++ b/util/txixml2texi.pl @@ -0,0 +1,452 @@ +#! /usr/bin/env perl +# +# texixml2texi -- convert Texinfo XML to Texinfo code +# +# Copyright 2012 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, +# or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# Original author: Patrice Dumas <pertusus@free.fr> + +use strict; +use Getopt::Long qw(GetOptions); +# for dirname. +use File::Basename; +use File::Spec; + +Getopt::Long::Configure("gnu_getopt"); + +BEGIN +{ + # emulate -w + $^W = 1; + my ($real_command_name, $command_directory, $command_suffix) + = fileparse($0, '.pl'); + + my $datadir = '@datadir@'; + my $package = '@PACKAGE@'; + my $updir = File::Spec->updir(); + + my $texinfolibdir; + my $lib_dir; + + # in-source run + if (($command_suffix eq '.pl' and !(defined($ENV{'TEXINFO_DEV_SOURCE'}) + and $ENV{'TEXINFO_DEV_SOURCE'} eq 0)) or $ENV{'TEXINFO_DEV_SOURCE'}) { + my $srcdir = defined $ENV{'srcdir'} ? $ENV{'srcdir'} : $command_directory; + $texinfolibdir = File::Spec->catdir($srcdir, $updir, 'tp'); + $lib_dir = File::Spec->catdir($texinfolibdir, 'maintain'); + unshift @INC, $texinfolibdir; + } elsif ($datadir ne '@' .'datadir@' and $package ne '@' . 'PACKAGE@' + and $datadir ne '') { + $texinfolibdir = File::Spec->catdir($datadir, $package); + # try to make package relocatable, will only work if standard relative paths + # are used + if (! -f File::Spec->catfile($texinfolibdir, 'Texinfo', 'Parser.pm') + and -f File::Spec->catfile($command_directory, $updir, 'share', + 'texinfo', 'Texinfo', 'Parser.pm')) { + $texinfolibdir = File::Spec->catdir($command_directory, $updir, + 'share', 'texinfo'); + } + $lib_dir = $texinfolibdir; + unshift @INC, $texinfolibdir; + } + + # '@USE_EXTERNAL_LIBINTL @ and similar are substituted in the + # makefile using values from configure + if (defined($texinfolibdir)) { + if ('@USE_EXTERNAL_LIBINTL@' ne 'yes') { + unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'libintl-perl', 'lib')); + } + if ('@USE_EXTERNAL_EASTASIANWIDTH@' ne 'yes') { + unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'Unicode-EastAsianWidth', 'lib')); + } + if ('@USE_EXTERNAL_UNIDECODE@' ne 'yes') { + unshift @INC, (File::Spec->catdir($lib_dir, 'lib', 'Text-Unidecode', 'lib')); + } + } +} + +use XML::LibXML::Reader; + +# gather information on Texinfo XML elements +use Texinfo::Common; +use Texinfo::Convert::TexinfoXML; + +my $debug = 0; +my $result_options = Getopt::Long::GetOptions ( + 'debug|d' => \$debug, +); + +sub command_with_braces($) +{ + my $command = shift; + if ($command =~ /^[a-z]/i) { + return "\@".$command.'{}'; + } else { + return "\@".$command; + } +} + +my %ignored_elements = ( + 'prepend' => 1, + 'formalarg' => 1, + # not ignored everytime + 'indexterm' => 1, +); + +my %elements_end_attributes = ( + 'accent' => 1, + 'menunode' => 1, + 'menutitle' => 1, +); + +my %element_at_commands; +my %entity_texts = ( + 'textldquo' => '``', + 'textrdquo' => "''", + 'textmdash' => '---', + 'textndash' => '--', + 'textrsquo' => "'", + 'textlsquo' => '`', + 'formfeed' => "\f", + # this is not used in pratice, as attrformfeed appears in an + # attribute and thus is already expanded to text. + 'attrformfeed' => "\f", +); + +foreach my $command (keys(%Texinfo::Convert::TexinfoXML::commands_formatting)) { + if (!ref($Texinfo::Convert::TexinfoXML::commands_formatting{$command})) { + $entity_texts{$Texinfo::Convert::TexinfoXML::commands_formatting{$command}} + = command_with_braces($command); + } else { + my $spec = $Texinfo::Convert::TexinfoXML::commands_formatting{$command}; + my $element = $spec->[0]; + if ($element eq 'spacecmd') { + if ($spec->[1] eq 'type') { + $element_at_commands{$element}->{"type"}->{$spec->[2]} + = command_with_braces($command); + } else { + die "BUG, bad spacecmd specification"; + } + } else { + $element_at_commands{$element} = command_with_braces($command); + } + } +} + +$element_at_commands{'accent'} = 0; + +my %arg_elements; +foreach my $command (keys(%Texinfo::Convert::TexinfoXML::commands_args_elements)) { + my $arg_index = 0; + foreach my $element_argument (@{$Texinfo::Convert::TexinfoXML::commands_args_elements{$command}}) { + $arg_elements{$element_argument} = [$arg_index, $command]; + $arg_index++; + } +} + +my %accent_type_command; +foreach my $accent_command (keys(%Texinfo::Convert::TexinfoXML::accent_types)) { + $accent_type_command{$Texinfo::Convert::TexinfoXML::accent_types{$accent_command}} + = $accent_command; +} + +my %eat_space_elements; +foreach my $element ('texinfo', 'filename') { + $eat_space_elements{$element} = 1; +} + +my $infile = shift @ARGV; + +if (!defined($infile) or $infile !~ /\S/) { + die "Missing file\n"; +} + +my $reader = XML::LibXML::Reader->new('location' => $infile, + 'expand_entities' => 0, + ) + or die "cannot read $infile\n"; + +#(my $mydir = $0) =~ s,/[^/]*$,,; # dir we are in +#my $txi_dtd_libdir = "$mydir"; # find tp relative to $0 + +sub skip_until_end($$) +{ + my $reader = shift; + my $name = shift; + while ($reader->read) { + if ($reader->nodeType() eq XML_READER_TYPE_END_ELEMENT + and $reader->name eq $name) { + return; + } + } +} + +my $eat_space = 0; +my @commands_with_args_stack; + +while ($reader->read) { + + # ============================================================ begin debug + if ($debug) { + printf STDERR "(args: @commands_with_args_stack) (eat_space $eat_space) %d %d %s %d", ($reader->depth, + $reader->nodeType, + $reader->name, + $reader->isEmptyElement); + my $value = ''; + if ($reader->hasValue()) { + $value = $reader->value(); + $value =~ s/\n/\\n/g; + print STDERR " |$value|"; + } + if ($reader->nodeType() eq XML_READER_TYPE_ELEMENT + and $reader->hasAttributes() + and defined($reader->getAttribute('spaces'))) { + my $spaces = $reader->getAttribute('spaces'); + print STDERR " spaces:$spaces|"; + } + print STDERR "\n"; + } + # ============================================================ end debug + + if ($reader->nodeType() eq XML_READER_TYPE_SIGNIFICANT_WHITESPACE + and $eat_space) { + $eat_space = 0; + next; + } elsif ($reader->nodeType() eq XML_READER_TYPE_TEXT + or $reader->nodeType() eq XML_READER_TYPE_WHITESPACE + or $reader->nodeType() eq XML_READER_TYPE_SIGNIFICANT_WHITESPACE + ) { + if ($reader->hasValue()) { + print $reader->value(); + } + } + my $name = $reader->name; + if ($reader->nodeType() eq XML_READER_TYPE_ELEMENT) { + if (($name eq 'entry' or $name eq 'indexcommand') + and $reader->hasAttributes() + and defined($reader->getAttribute('command'))) { + $name = $reader->getAttribute('command'); + } elsif ($name eq 'listitem') { + $name = 'item'; + } + if ($Texinfo::Convert::TexinfoXML::commands_args_elements{$name}) { + push @commands_with_args_stack, 0; + } + if (exists $element_at_commands{$name}) { + if ($name eq 'accent') { + if ($reader->hasAttributes()) { + if (defined($reader->getAttribute('type'))) { + my $command = $accent_type_command{$reader->getAttribute('type')}; + print "\@$command" + if (defined($command)); + } + if (!defined($reader->getAttribute('spaces')) + and !(defined($reader->getAttribute('bracketed')) + and $reader->getAttribute('bracketed') eq 'off')) { + print '{'; + } + } else { + print '{'; + } + } elsif (!ref($element_at_commands{$name})) { + print $element_at_commands{$name}; + } else { + my ($attribute) = keys(%{$element_at_commands{$name}}); + if ($reader->hasAttributes() + and defined($reader->getAttribute($attribute))) { + print + $element_at_commands{$name}->{$attribute}->{$reader->getAttribute($attribute)}; + } + } + } elsif (exists($Texinfo::Common::brace_commands{$name})) { + print "\@${name}{"; + if ($name eq 'verb' and $reader->hasAttributes() + and defined($reader->getAttribute('delimiter'))) { + print $reader->getAttribute('delimiter'); + } + } elsif (exists($Texinfo::Common::block_commands{$name})) { + print "\@$name"; + if ($name eq 'macro') { + if ($reader->hasAttributes() and defined($reader->getAttribute('line'))) { + print $reader->getAttribute('line'); + } + print "\n"; + } + } elsif (defined($Texinfo::Common::misc_commands{$name})) { + if ($reader->hasAttributes() + and defined($reader->getAttribute('originalcommand'))) { + $name = $reader->getAttribute('originalcommand'); + } + if ($name eq 'documentencoding' and $reader->hasAttributes() + and defined($reader->getAttribute('encoding'))) { + my ($texinfo_encoding, $perl_encoding, $output_encoding) + = Texinfo::Encoding::encoding_alias($reader->getAttribute('encoding')); + + if (defined($perl_encoding)) { + if ($debug) { + print STDERR "Using encoding $perl_encoding\n"; + } + binmode(STDOUT, ":encoding($perl_encoding)"); + } + } + print "\@$name"; + if ($reader->hasAttributes() and defined($reader->getAttribute('line'))) { + my $line = $reader->getAttribute('line'); + $line =~ s/\\\\/\x{1F}/g; + $line =~ s/\\f/\f/g; + $line =~ s/\x{1F}/\\/g; + print $line; + } + if ($name eq 'set' or $name eq 'clickstyle') { + skip_until_end($reader, $name); + } + } elsif ($arg_elements{$name}) { + if ($reader->hasAttributes() + and defined($reader->getAttribute('automatic')) + and $reader->getAttribute('automatic') eq 'on') { + skip_until_end($reader, $name); + next; + } + while ($arg_elements{$name}->[0] + and $commands_with_args_stack[-1] < $arg_elements{$name}->[0]) { + $commands_with_args_stack[-1]++; + print ','; + } + } elsif ($ignored_elements{$name}) { + my $keep_indexterm = 0; + if ($name eq 'indexterm') { + my $node_path = $reader->nodePath(); + if ($node_path =~ m:([a-z]+)/indexterm$:) { + my $parent = $1; + if ($parent =~ /^[a-z]?[a-z]index$/ or $parent eq 'indexcommand') { + $keep_indexterm = 1; + } + } + } + if (!$keep_indexterm) { + skip_until_end($reader, $name); + next; + } + } elsif ($name eq 'formattingcommand') { + if ($reader->hasAttributes() + and defined($reader->getAttribute('command'))) { + print '@'.$reader->getAttribute('command'); + } + # def* automatic + } elsif ($reader->hasAttributes() + and defined($reader->getAttribute('automatic')) + and $reader->getAttribute('automatic') eq 'on') { + skip_until_end($reader, $name); + # eat the following space + $reader->read(); + } elsif ($eat_space_elements{$name}) { + $eat_space = 1; + } else { + print STDERR "UNKNOWN $name\n" if ($debug); + } + if ($reader->hasAttributes()) { + if (defined($reader->getAttribute('bracketed')) + and $reader->getAttribute('bracketed') eq 'on') { + print '{'; + } + if (defined($reader->getAttribute('spaces'))) { + my $spaces = $reader->getAttribute('spaces'); + $spaces =~ s/\\n/\n/g; + $spaces =~ s/\\f/\f/g; + print $spaces; + } + if (defined($reader->getAttribute('leadingtext'))) { + print $reader->getAttribute('leadingtext'); + } + } + if ($Texinfo::Common::item_line_commands{$name} + and $reader->hasAttributes() + and defined($reader->getAttribute('commandarg'))) { + print '@'.$reader->getAttribute('commandarg'); + } + } elsif ($reader->nodeType() eq XML_READER_TYPE_END_ELEMENT) { + if ($Texinfo::Convert::TexinfoXML::commands_args_elements{$name}) { + pop @commands_with_args_stack; + } + if ($reader->hasAttributes()) { + if (defined($reader->getAttribute('bracketed')) + and $reader->getAttribute('bracketed') eq 'on') { + print '}'; + } + } + if (exists ($Texinfo::Common::brace_commands{$name})) { + if ($name eq 'verb' and $reader->hasAttributes() + and defined($reader->getAttribute('delimiter'))) { + print $reader->getAttribute('delimiter'); + } + print '}'; + } elsif (exists($Texinfo::Common::block_commands{$name})) { + my $end_spaces; + if ($reader->hasAttributes() + and defined($reader->getAttribute('endspaces'))) { + $end_spaces = $reader->getAttribute('endspaces'); + } + $end_spaces = ' ' if (!defined($end_spaces) or $end_spaces eq ''); + print "\@end".$end_spaces."$name"; + } elsif (defined($Texinfo::Common::misc_commands{$name})) { + if ($Texinfo::Common::root_commands{$name} and $name ne 'node') { + $eat_space = 1; + } + } elsif ($elements_end_attributes{$name}) { + if ($name eq 'accent') { + if ($reader->hasAttributes()) { + if (!defined($reader->getAttribute('spaces')) + and !(defined($reader->getAttribute('bracketed')) + and $reader->getAttribute('bracketed') eq 'off')) { + print '}'; + } + } else { + print '}'; + } + } elsif ($reader->hasAttributes() + and defined($reader->getAttribute('separator'))) { + print $reader->getAttribute('separator'); + } + } elsif ($eat_space_elements{$name}) { + $eat_space = 1; + } else { + print STDERR "END UNKNOWN $name\n" if ($debug); + } + if ($reader->hasAttributes() + and defined($reader->getAttribute('trailingspaces'))) { + my $trailingspaces = $reader->getAttribute('trailingspaces'); + $trailingspaces =~ s/\\f/\f/g; + print $trailingspaces; + } + } elsif ($reader->nodeType() eq XML_READER_TYPE_ENTITY_REFERENCE) { + if (defined($entity_texts{$name})) { + print $entity_texts{$name}; + } + } elsif ($reader->nodeType() eq XML_READER_TYPE_COMMENT) { + my $comment; + if ($reader->hasValue()) { + $comment = $reader->value(); + $comment =~ s/^ (comment|c)//; + my $command = $1; + $comment =~ s/ $//; + print "\@${command}$comment"; + } + } elsif ($reader->nodeType() eq XML_READER_TYPE_DOCUMENT_TYPE) { + $eat_space = 1; + } +} + +1; |