* src/preproc/preconv/preconv.cpp (do_file): Don't pass BOM to

`conversion_utf8'. * src/preproc/preconv/preconv.man: New file. Not complete yet. * src/preproc/proconv/Makefile.sub (MAN1): New variable.
author: wl <wl> 2006-01-05 15:45:00 +0000
committer: wl <wl> 2006-01-05 15:45:00 +0000
commit: 595c39b716c8b4b31587bde4fdaaed565836bafa (patch)
tree: 53b88f6138e63589d08a39ae2ebdf025aaa8bd19 /src/preproc/preconv
parent: 6d141e4c4e8d16b60e6a50d15cf80630443d00d8 (diff)
download: groff-595c39b716c8b4b31587bde4fdaaed565836bafa.tar.gz
3 files changed, 165 insertions, 5 deletions
diff --git a/src/preproc/preconv/Makefile.sub b/src/preproc/preconv/Makefile.sub
index c5f2c9d5..e53050f1 100644
--- a/src/preproc/preconv/Makefile.sub
+++ b/src/preproc/preconv/Makefile.sub
@@ -1,5 +1,5 @@
 PROG=preconv$(EXEEXT)
-# MAN1=preconv.n
+MAN1=preconv.n
 XLIBS=$(LIBGROFF)
 MLIB=$(LIBM)
 EXTRA_LDFLAGS=$(LIBICONV)
diff --git a/src/preproc/preconv/preconv.cpp b/src/preproc/preconv/preconv.cpp
index 0897c30d..e3a460e1 100644
--- a/src/preproc/preconv/preconv.cpp
+++ b/src/preproc/preconv/preconv.cpp
@@ -1051,20 +1051,19 @@ do_file(const char *filename)
   }
   if (debug_flag)
     fprintf(stderr, "  encoding used: `%s'\n", encoding);
-  data = BOM + data;
   if (!raw_flag)
     printf(".lf 1 %s\n", filename);
   int success = 1;
   // Call converter (converters write to stdout).
   if (!strcasecmp(encoding, "ISO-8859-1"))
-    conversion_latin1(fp, data);
+    conversion_latin1(fp, BOM + data);
   else if (!strcasecmp(encoding, "UTF-8"))
     conversion_utf8(fp, data);
   else if (!strcasecmp(encoding, "cp1047"))
-    conversion_cp1047(fp, data);
+    conversion_cp1047(fp, BOM + data);
   else {
 #if HAVE_ICONV
-    conversion_iconv(fp, data, encoding);
+    conversion_iconv(fp, BOM + data, encoding);
 #else
     error("encoding system `%1' not supported", encoding);
     success = 0;
diff --git a/src/preproc/preconv/preconv.man b/src/preproc/preconv/preconv.man
new file mode 100644
index 00000000..23c14c7d
--- /dev/null
+++ b/src/preproc/preconv/preconv.man
@@ -0,0 +1,161 @@
+.ig
+Copyright (C) 2006 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the
+entire resulting derived work is distributed under the terms of a
+permission notice identical to this one.
+
+Permission is granted to copy and distribute translations of this
+manual into another language, under the above conditions for modified
+versions, except that this permission notice may be included in
+translations approved by the Free Software Foundation instead of in
+the original English.
+..
+.
+.TH PRECONV @MAN1EXT@ "@MDATE@" "Groff Version @VERSION@"
+.
+.
+.SH NAME
+preconv \- convert encoding of input files to something GNU troff understands
+.
+.
+.SH SYNOPSIS
+.B preconv
+[
+.B \-dhrv
+]
+[
+.BI \-e encoding
+]
+[
+.IR files \|.\|.\|.\|
+]
+.
+.PP
+It is possible to have whitespace between the
+.B \-e
+command line option and its parameter.
+.
+.
+.SH DESCRIPTION
+.B preconv
+reads
+.I files
+and converts its encoding(s) to a form GNU
+.BR troff (@MAN1EXT)
+can process, sending the data to standard output.
+Currently, this means ASCII characters and `\e[uXXXX]' entities, where
+`XXXX' is a hexadecimal number with four to six digits, representing a
+Unicode input code.
+Normally,
+.B preconv
+should be invoked with the
+.B \-k
+and
+.B \-K
+options of
+.BR groff .
+.
+.
+.SH OPTIONS
+.TP
+.B \-d
+Emit debugging messages to standard error (mainly the used encoding).
+.
+.TP
+.BI \-e encoding
+Specify input encoding explicitly, overriding all other methods.
+This corresponds to
+.BR groff 's
+.BI \-K encoding
+option.
+Without this switch,
+.B preconv
+uses the algorithm described below to select the input encoding.
+.
+.TP
+.B \-h
+Print help message.
+.
+.TP
+.B \-r
+Do not add .lf requests.
+.
+.TP
+.B \-v
+Print version number.
+.
+.
+.SH USAGE
+.B preconv
+tries to find the input encoding with the following algorithm.
+.
+.IP 1.
+If the input encoding has been explicitly specified with option
+.BR \-e ,
+use it.
+.
+.IP 2.
+Otherwise, check whether the input starts with a
+.I Byte Order Mark
+(BOM, see below).
+If found, use it.
+.
+.IP 3.
+Finally, check whether there is a known
+.I coding tag
+(see below) in either the first or second input line.
+If found, use it.
+.
+.IP 4.
+If everything fails, use a default encoding as given by the current locale,
+or `latin1' if the locale is set to `C', `POSIX', or empty.
+.
+.PP
+Note tha the
+.B groff
+program supports a
+.B GROFF_ENCODING
+environment variable which is eventually expanded to option
+.BR \-k .
+.
+.SS "Byte Order Mark"
+The Unicode Standard defines character U+FEFF as the the Byte Order Mark
+(BOM).
+On the other hand, value U+FFFE is guaranteed not be a Unicode character at
+all.
+This allows to detect the byte order within the data stream (either
+big-endian or lower-endian), and the MIME encodings `UTF-16' and `UTF-32'
+mandate that the data stream starts with U+FEFF.
+Similarly, the data stream encoded as `UTF-8' might start with a BOM (to
+ease the conversion from and to UTF-16 and UTF-32).
+In all cases, the byte order mark is
+.I not
+part of the data but part of the encoding protocol; with other words,
+.BR preconv 's
+output doesn't contain it.
+.
+.PP
+Note that U+FEFF not at the start of the input data actually is emitted;
+it has then the meaning of a `zero width no-break space' character \[en]
+something not needed normally in
+.BR groff .
+.
+.SS "Coding Tags"
+To be written.
+.
+.SS "Iconv Issues"
+To be written.
+.
+.
+.SH "SEE ALSO"
+.BR groff (@MAN1EXT@)
+.
+.\" Local Variables:
+.\" mode: nroff
+.\" End:
author	wl <wl>	2006-01-05 15:45:00 +0000
committer	wl <wl>	2006-01-05 15:45:00 +0000
commit	595c39b716c8b4b31587bde4fdaaed565836bafa (patch)
tree	53b88f6138e63589d08a39ae2ebdf025aaa8bd19 /src/preproc/preconv
parent	6d141e4c4e8d16b60e6a50d15cf80630443d00d8 (diff)
download	groff-595c39b716c8b4b31587bde4fdaaed565836bafa.tar.gz