diff options
author | tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4> | 2000-11-01 17:00:02 +0000 |
---|---|---|
committer | tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4> | 2000-11-01 17:00:02 +0000 |
commit | 3b653a71f8a9df17ca147173a6ecae85a4b561e3 (patch) | |
tree | eb0fff5301aba42e99bf81089d2c71311deab8ad /libjava | |
parent | 45c904cd51b265948e43d575055347dadc8b18cf (diff) | |
download | gcc-3b653a71f8a9df17ca147173a6ecae85a4b561e3.tar.gz |
* scripts/encodings.pl: Added `ASCII' alias.
* Makefile.in: Rebuilt.
* Makefile.am (convert_source_files): Added new files.
* gnu/gcj/convert/Input_ASCII.java: New file.
* gnu/gcj/convert/Output_ASCII.java: New file.
* gnu/gcj/convert/Output_8859_1.java (write): Use `?' to represent
out-of-range characters.
* gnu/gcj/convert/natIconv.cc (iconv_init): New method.
(read): Swap bytes if required. Treat `count' as character count,
not byte count.
(write): Likewise. Also, handle case where iconv fails on a given
character.
(init): Put encoding into exception.
* gnu/gcj/convert/IOConverter.java (iconv_byte_swap): New global.
(static): Call iconv_init. Rebuilt alias list.
(iconv_init): New private method.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@37190 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libjava')
-rw-r--r-- | libjava/ChangeLog | 19 | ||||
-rw-r--r-- | libjava/Makefile.am | 2 | ||||
-rw-r--r-- | libjava/Makefile.in | 4 | ||||
-rw-r--r-- | libjava/gnu/gcj/convert/IOConverter.java | 19 | ||||
-rw-r--r-- | libjava/gnu/gcj/convert/Input_8859_1.java | 4 | ||||
-rw-r--r-- | libjava/gnu/gcj/convert/Input_ASCII.java | 37 | ||||
-rw-r--r-- | libjava/gnu/gcj/convert/Output_8859_1.java | 12 | ||||
-rw-r--r-- | libjava/gnu/gcj/convert/Output_ASCII.java | 54 | ||||
-rw-r--r-- | libjava/gnu/gcj/convert/natIconv.cc | 115 | ||||
-rw-r--r-- | libjava/scripts/encodings.pl | 1 |
10 files changed, 246 insertions, 21 deletions
diff --git a/libjava/ChangeLog b/libjava/ChangeLog index 0fe0ed3065f..1901cfce861 100644 --- a/libjava/ChangeLog +++ b/libjava/ChangeLog @@ -1,5 +1,24 @@ 2000-11-01 Tom Tromey <tromey@cygnus.com> + * scripts/encodings.pl: Added `ASCII' alias. + * Makefile.in: Rebuilt. + * Makefile.am (convert_source_files): Added new files. + * gnu/gcj/convert/Input_ASCII.java: New file. + * gnu/gcj/convert/Output_ASCII.java: New file. + * gnu/gcj/convert/Output_8859_1.java (write): Use `?' to represent + out-of-range characters. + * gnu/gcj/convert/natIconv.cc (iconv_init): New method. + (read): Swap bytes if required. Treat `count' as character count, + not byte count. + (write): Likewise. Also, handle case where iconv fails on a given + character. + (init): Put encoding into exception. + * gnu/gcj/convert/IOConverter.java (iconv_byte_swap): New global. + (static): Call iconv_init. Rebuilt alias list. + (iconv_init): New private method. + +2000-11-01 Tom Tromey <tromey@cygnus.com> + * Makefile.in: Rebuilt. * Makefile.am (install-exec-hook): Only make a single symlink, and remove the destination before making the link. diff --git a/libjava/Makefile.am b/libjava/Makefile.am index 8a5372d26c2..d3fcd9b014c 100644 --- a/libjava/Makefile.am +++ b/libjava/Makefile.am @@ -506,6 +506,7 @@ convert_source_files = \ gnu/gcj/convert/BytesToUnicode.java \ gnu/gcj/convert/Convert.java \ gnu/gcj/convert/Input_8859_1.java \ +gnu/gcj/convert/Input_ASCII.java \ gnu/gcj/convert/Input_EUCJIS.java \ gnu/gcj/convert/Input_JavaSrc.java \ gnu/gcj/convert/Input_SJIS.java \ @@ -513,6 +514,7 @@ gnu/gcj/convert/Input_UTF8.java \ gnu/gcj/convert/Input_iconv.java \ gnu/gcj/convert/IOConverter.java \ gnu/gcj/convert/Output_8859_1.java \ +gnu/gcj/convert/Output_ASCII.java \ gnu/gcj/convert/Output_EUCJIS.java \ gnu/gcj/convert/Output_JavaSrc.java \ gnu/gcj/convert/Output_SJIS.java \ diff --git a/libjava/Makefile.in b/libjava/Makefile.in index f1d15018d29..7ebd6cebe93 100644 --- a/libjava/Makefile.in +++ b/libjava/Makefile.in @@ -280,6 +280,7 @@ convert_source_files = \ gnu/gcj/convert/BytesToUnicode.java \ gnu/gcj/convert/Convert.java \ gnu/gcj/convert/Input_8859_1.java \ +gnu/gcj/convert/Input_ASCII.java \ gnu/gcj/convert/Input_EUCJIS.java \ gnu/gcj/convert/Input_JavaSrc.java \ gnu/gcj/convert/Input_SJIS.java \ @@ -287,6 +288,7 @@ gnu/gcj/convert/Input_UTF8.java \ gnu/gcj/convert/Input_iconv.java \ gnu/gcj/convert/IOConverter.java \ gnu/gcj/convert/Output_8859_1.java \ +gnu/gcj/convert/Output_ASCII.java \ gnu/gcj/convert/Output_EUCJIS.java \ gnu/gcj/convert/Output_JavaSrc.java \ gnu/gcj/convert/Output_SJIS.java \ @@ -1197,6 +1199,7 @@ DEP_FILES = .deps/$(srcdir)/$(CONVERT_DIR)/gen-from-JIS.P \ .deps/gnu/gcj/convert/BytesToUnicode.P .deps/gnu/gcj/convert/Convert.P \ .deps/gnu/gcj/convert/IOConverter.P \ .deps/gnu/gcj/convert/Input_8859_1.P \ +.deps/gnu/gcj/convert/Input_ASCII.P \ .deps/gnu/gcj/convert/Input_EUCJIS.P \ .deps/gnu/gcj/convert/Input_JavaSrc.P \ .deps/gnu/gcj/convert/Input_SJIS.P .deps/gnu/gcj/convert/Input_UTF8.P \ @@ -1204,6 +1207,7 @@ DEP_FILES = .deps/$(srcdir)/$(CONVERT_DIR)/gen-from-JIS.P \ .deps/gnu/gcj/convert/JIS0208_to_Unicode.P \ .deps/gnu/gcj/convert/JIS0212_to_Unicode.P \ .deps/gnu/gcj/convert/Output_8859_1.P \ +.deps/gnu/gcj/convert/Output_ASCII.P \ .deps/gnu/gcj/convert/Output_EUCJIS.P \ .deps/gnu/gcj/convert/Output_JavaSrc.P \ .deps/gnu/gcj/convert/Output_SJIS.P .deps/gnu/gcj/convert/Output_UTF8.P \ diff --git a/libjava/gnu/gcj/convert/IOConverter.java b/libjava/gnu/gcj/convert/IOConverter.java index c98662485b6..9b5fbad00f4 100644 --- a/libjava/gnu/gcj/convert/IOConverter.java +++ b/libjava/gnu/gcj/convert/IOConverter.java @@ -18,6 +18,10 @@ public abstract class IOConverter // Map encoding aliases to our canonical form. static private Hashtable hash = new Hashtable (); + // True if we have to do byte-order conversions on iconv() + // arguments. + static protected boolean iconv_byte_swap; + static { // Manually maintained aliases. Note that the value must be our @@ -25,6 +29,17 @@ public abstract class IOConverter hash.put ("ISO-Latin-1", "8859_1"); // All aliases after this point are automatically generated by the // `encodings.pl' script. Run it to make any corrections. + hash.put ("ANSI_X3.4-1968", "ASCII"); + hash.put ("iso-ir-6", "ASCII"); + hash.put ("ANSI_X3.4-1986", "ASCII"); + hash.put ("ISO_646.irv:1991", "ASCII"); + hash.put ("ASCII", "ASCII"); + hash.put ("ISO646-US", "ASCII"); + hash.put ("US-ASCII", "ASCII"); + hash.put ("us", "ASCII"); + hash.put ("IBM367", "ASCII"); + hash.put ("cp367", "ASCII"); + hash.put ("csASCII", "ASCII"); hash.put ("ISO_8859-1:1987", "8859_1"); hash.put ("iso-ir-100", "8859_1"); hash.put ("ISO_8859-1", "8859_1"); @@ -41,8 +56,12 @@ public abstract class IOConverter hash.put ("Extended_UNIX_Code_Packed_Format_for_Japanese", "EUCJIS"); hash.put ("csEUCPkdFmtJapanese", "EUCJIS"); hash.put ("EUC-JP", "EUCJIS"); + + iconv_byte_swap = iconv_init (); } + private static native boolean iconv_init (); + // Turn an alias into the canonical form. protected static final String canonicalize (String name) { diff --git a/libjava/gnu/gcj/convert/Input_8859_1.java b/libjava/gnu/gcj/convert/Input_8859_1.java index 6c70034f3d9..bd5f7798086 100644 --- a/libjava/gnu/gcj/convert/Input_8859_1.java +++ b/libjava/gnu/gcj/convert/Input_8859_1.java @@ -1,4 +1,4 @@ -/* Copyright (C) 1999 Free Software Foundation +/* Copyright (C) 1999, 2000 Free Software Foundation This file is part of libgcj. @@ -9,7 +9,7 @@ details. */ package gnu.gcj.convert; /** - * Convert ISO-Latin-1 (8851-1) text to Unicode. + * Convert ISO-Latin-1 (8859-1) text to Unicode. * @author Per Bothner <bothner@cygnus.com> * @date March 1999. */ diff --git a/libjava/gnu/gcj/convert/Input_ASCII.java b/libjava/gnu/gcj/convert/Input_ASCII.java new file mode 100644 index 00000000000..cb531e980d0 --- /dev/null +++ b/libjava/gnu/gcj/convert/Input_ASCII.java @@ -0,0 +1,37 @@ +/* Copyright (C) 2000 Free Software Foundation + + This file is part of libgcj. + +This software is copyrighted work licensed under the terms of the +Libgcj License. Please consult the file "LIBGCJ_LICENSE" for +details. */ + +package gnu.gcj.convert; + +/** + * Convert ASCII text to Unicode. + * @date October 2000 + */ + +public class Input_ASCII extends BytesToUnicode +{ + public String getName() { return "ASCII"; } + + public int read (char[] outbuffer, int outpos, int count) + { + int origpos = outpos; + // Make sure fields of this are in registers. + int inpos = this.inpos; + byte[] inbuffer = this.inbuffer; + int inavail = this.inlength - inpos; + int outavail = count; + if (outavail > inavail) + outavail = inavail; + while (--outavail >= 0) + { + outbuffer[outpos++] = (char) (inbuffer[inpos++] & 0x7f); + } + this.inpos = inpos; + return outpos - origpos; + } +} diff --git a/libjava/gnu/gcj/convert/Output_8859_1.java b/libjava/gnu/gcj/convert/Output_8859_1.java index ac04ad67ac1..7ae6a615f23 100644 --- a/libjava/gnu/gcj/convert/Output_8859_1.java +++ b/libjava/gnu/gcj/convert/Output_8859_1.java @@ -1,4 +1,4 @@ -/* Copyright (C) 1999 Free Software Foundation +/* Copyright (C) 1999, 2000 Free Software Foundation This file is part of libgcj. @@ -10,9 +10,9 @@ package gnu.gcj.convert; /** * Convert Unicode ISO-Latin-1 (8851-1) text. - * The high-order byte of each character is truncated. + * Unrecognized characters are printed as `?'. * @author Per Bothner <bothner@cygnus.com> - * @date Match 1999. + * @date March 1999. */ public class Output_8859_1 extends UnicodeToBytes @@ -30,7 +30,8 @@ public class Output_8859_1 extends UnicodeToBytes inlength = avail; for (int i = inlength; --i >= 0; ) { - buf[count++] = (byte) inbuffer[inpos++]; + char c = inbuffer[inpos++]; + buf[count++] = (byte) ((c > 0xff) ? '?' : c); } this.count = count; return inlength; @@ -45,7 +46,8 @@ public class Output_8859_1 extends UnicodeToBytes inlength = avail; for (int i = inlength; --i >= 0; ) { - buf[count++] = (byte) str.charAt(inpos++); + char c = str.charAt(inpos++); + buf[count++] = (byte) ((c > 0xff) ? '?' : c); } this.count = count; return inlength; diff --git a/libjava/gnu/gcj/convert/Output_ASCII.java b/libjava/gnu/gcj/convert/Output_ASCII.java new file mode 100644 index 00000000000..9f336452501 --- /dev/null +++ b/libjava/gnu/gcj/convert/Output_ASCII.java @@ -0,0 +1,54 @@ +/* Copyright (C) 2000 Free Software Foundation + + This file is part of libgcj. + +This software is copyrighted work licensed under the terms of the +Libgcj License. Please consult the file "LIBGCJ_LICENSE" for +details. */ + +package gnu.gcj.convert; + +/** + * Convert Unicode ASCII + * Unrecognized characters are printed as `?'. + * @date October 2000 + */ + +public class Output_ASCII extends UnicodeToBytes +{ + public String getName() { return "ASCII"; } + + /** + * @return number of chars converted. */ + public int write (char[] inbuffer, int inpos, int inlength) + { + int count = this.count; + byte[] buf = this.buf; + int avail = buf.length - count; + if (inlength > avail) + inlength = avail; + for (int i = inlength; --i >= 0; ) + { + char c = inbuffer[inpos++]; + buf[count++] = (byte) ((c > 0x7f) ? '?' : c); + } + this.count = count; + return inlength; + } + + public int write (String str, int inpos, int inlength, char[] work) + { + int count = this.count; + byte[] buf = this.buf; + int avail = buf.length - count; + if (inlength > avail) + inlength = avail; + for (int i = inlength; --i >= 0; ) + { + char c = str.charAt(inpos++); + buf[count++] = (byte) ((c > 0x7f) ? '?' : c); + } + this.count = count; + return inlength; + } +} diff --git a/libjava/gnu/gcj/convert/natIconv.cc b/libjava/gnu/gcj/convert/natIconv.cc index 061779c02b8..d346b1488f2 100644 --- a/libjava/gnu/gcj/convert/natIconv.cc +++ b/libjava/gnu/gcj/convert/natIconv.cc @@ -44,13 +44,13 @@ gnu::gcj::convert::Input_iconv::init (jstring encoding) iconv_t h = iconv_open ("UCS-2", buffer); if (h == (iconv_t) -1) - JvThrow (new java::io::UnsupportedEncodingException); + throw new java::io::UnsupportedEncodingException (encoding); JvAssert (h != NULL); handle = reinterpret_cast<gnu::gcj::RawData *> (h); #else /* HAVE_ICONV */ // If no iconv, just throw an exception. - JvThrow (new java::io::UnsupportedEncodingException); + throw new java::io::UnsupportedEncodingException (encoding); #endif /* HAVE_ICONV */ } @@ -75,7 +75,7 @@ gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer, jchar *out = elements (outbuffer); size_t inavail = inlength - inpos; size_t old_in = inavail; - size_t outavail = count; + size_t outavail = count * sizeof (jchar); size_t old_out = outavail; char *inbuf = (char *) &bytes[inpos]; @@ -86,8 +86,20 @@ gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer, &outbuf, &outavail); // FIXME: what if R==-1? + if (iconv_byte_swap) + { + size_t max = (old_out - outavail) / sizeof (jchar); + for (size_t i = 0; i < max; ++i) + { + // Byte swap. + jchar c = (((out[outpos + i] & 0xff) << 8) + | ((out[outpos + i] >> 8) & 0xff)); + outbuf[i] = c; + } + } + inpos += old_in - inavail; - return old_out - outavail; + return (old_out - outavail) / sizeof (jchar); #else /* HAVE_ICONV */ return -1; #endif /* HAVE_ICONV */ @@ -104,13 +116,13 @@ gnu::gcj::convert::Output_iconv::init (jstring encoding) iconv_t h = iconv_open (buffer, "UCS-2"); if (h == (iconv_t) -1) - JvThrow (new java::io::UnsupportedEncodingException); + throw new java::io::UnsupportedEncodingException (encoding); JvAssert (h != NULL); handle = reinterpret_cast<gnu::gcj::RawData *> (h); #else /* HAVE_ICONV */ // If no iconv, just throw an exception. - JvThrow (new java::io::UnsupportedEncodingException); + throw new java::io::UnsupportedEncodingException (encoding); #endif /* HAVE_ICONV */ } @@ -128,14 +140,15 @@ gnu::gcj::convert::Output_iconv::finalize (void) jint gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer, - jint inpos, jint count) + jint inpos, jint inlength) { #ifdef HAVE_ICONV jchar *chars = elements (inbuffer); jbyte *out = elements (buf); + jchar *temp_buffer = NULL; - size_t inavail = count; - size_t old_in = count; + size_t inavail = inlength * sizeof (jchar); + size_t old_in = inavail; size_t outavail = buf->length - count; size_t old_out = outavail; @@ -143,14 +156,88 @@ gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer, char *inbuf = (char *) &chars[inpos]; char *outbuf = (char *) &out[count]; - size_t r = iconv_adapter (iconv, (iconv_t) handle, - &inbuf, &inavail, - &outbuf, &outavail); - // FIXME: what if R==-1? + if (iconv_byte_swap) + { + // Ugly performance penalty -- don't use losing systems! + temp_buffer = (jchar *) _Jv_Malloc (inlength * sizeof (jchar)); + for (int i = 0; i < inlength; ++i) + { + // Byte swap. + jchar c = (((chars[inpos + i] & 0xff) << 8) + | ((chars[inpos + i] >> 8) & 0xff)); + temp_buffer[i] = c; + } + inbuf = (char *) temp_buffer; + } + + // If the conversion fails on the very first character, then we + // assume that the character can't be represented in the output + // encoding. There's nothing useful we can do here, so we simply + // omit that character. Note that we can't check `errno' because + // glibc 2.1.3 doesn't set it correctly. We could check it if we + // really needed to, but we'd have to disable support for 2.1.3. + size_t loop_old_in = old_in; + while (1) + { + size_t r = iconv_adapter (iconv, (iconv_t) handle, + &inbuf, &inavail, + &outbuf, &outavail); + if (r == -1 && inavail == loop_old_in) + { + inavail -= 2; + if (inavail == 0) + break; + loop_old_in -= 2; + inbuf += 2; + } + else + break; + } + + if (temp_buffer != NULL) + _Jv_Free (temp_buffer); count += old_out - outavail; - return old_in - inavail; + return (old_in - inavail) / sizeof (jchar); #else /* HAVE_ICONV */ return -1; #endif /* HAVE_ICONV */ } + +jboolean +gnu::gcj::convert::IOConverter::iconv_init (void) +{ + // Some versions of iconv() always return their UCS-2 results in + // big-endian order, and they also require UCS-2 inputs to be in + // big-endian order. For instance, glibc 2.1.3 does this. If the + // UTF-8=>UCS-2 iconv converter has this feature, then we assume + // that all UCS-2 converters do. (This might not be the best + // heuristic, but is is all we've got.) + jboolean result = false; +#ifdef HAVE_ICONV + iconv_t handle = iconv_open ("UCS-2", "UTF-8"); + if (handle != (iconv_t) -1) + { + jchar c; + unsigned char in[3]; + char *inp, *outp; + size_t inc, outc, r; + + // This is the UTF-8 encoding of \ufeff. + in[0] = 0xef; + in[1] = 0xbb; + in[2] = 0xbf; + + inp = (char *) in; + inc = 3; + outp = (char *) &c; + outc = 2; + + r = iconv_adapter (iconv, handle, &inp, &inc, &outp, &outc); + // Conversion must be complete for us to use the result. + if (r != (size_t) -1 && inc == 0 && outc == 0) + result = (c != 0xfeff); + } +#endif /* HAVE_ICONV */ + return result; +} diff --git a/libjava/scripts/encodings.pl b/libjava/scripts/encodings.pl index f2f649959ce..4c7f0579534 100644 --- a/libjava/scripts/encodings.pl +++ b/libjava/scripts/encodings.pl @@ -4,6 +4,7 @@ # Map IANA canonical names onto our canonical names. %map = ( + 'ANSI_X3.4-1968' => 'ASCII', 'ISO_8859-1:1987' => '8859_1', 'UTF-8' => 'UTF8', 'Shift_JIS' => 'SJIS', |