diff options
Diffstat (limited to 'libjava/gnu/gcj/convert/natIconv.cc')
-rw-r--r-- | libjava/gnu/gcj/convert/natIconv.cc | 115 |
1 files changed, 101 insertions, 14 deletions
diff --git a/libjava/gnu/gcj/convert/natIconv.cc b/libjava/gnu/gcj/convert/natIconv.cc index 061779c02b8..d346b1488f2 100644 --- a/libjava/gnu/gcj/convert/natIconv.cc +++ b/libjava/gnu/gcj/convert/natIconv.cc @@ -44,13 +44,13 @@ gnu::gcj::convert::Input_iconv::init (jstring encoding) iconv_t h = iconv_open ("UCS-2", buffer); if (h == (iconv_t) -1) - JvThrow (new java::io::UnsupportedEncodingException); + throw new java::io::UnsupportedEncodingException (encoding); JvAssert (h != NULL); handle = reinterpret_cast<gnu::gcj::RawData *> (h); #else /* HAVE_ICONV */ // If no iconv, just throw an exception. - JvThrow (new java::io::UnsupportedEncodingException); + throw new java::io::UnsupportedEncodingException (encoding); #endif /* HAVE_ICONV */ } @@ -75,7 +75,7 @@ gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer, jchar *out = elements (outbuffer); size_t inavail = inlength - inpos; size_t old_in = inavail; - size_t outavail = count; + size_t outavail = count * sizeof (jchar); size_t old_out = outavail; char *inbuf = (char *) &bytes[inpos]; @@ -86,8 +86,20 @@ gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer, &outbuf, &outavail); // FIXME: what if R==-1? + if (iconv_byte_swap) + { + size_t max = (old_out - outavail) / sizeof (jchar); + for (size_t i = 0; i < max; ++i) + { + // Byte swap. + jchar c = (((out[outpos + i] & 0xff) << 8) + | ((out[outpos + i] >> 8) & 0xff)); + outbuf[i] = c; + } + } + inpos += old_in - inavail; - return old_out - outavail; + return (old_out - outavail) / sizeof (jchar); #else /* HAVE_ICONV */ return -1; #endif /* HAVE_ICONV */ @@ -104,13 +116,13 @@ gnu::gcj::convert::Output_iconv::init (jstring encoding) iconv_t h = iconv_open (buffer, "UCS-2"); if (h == (iconv_t) -1) - JvThrow (new java::io::UnsupportedEncodingException); + throw new java::io::UnsupportedEncodingException (encoding); JvAssert (h != NULL); handle = reinterpret_cast<gnu::gcj::RawData *> (h); #else /* HAVE_ICONV */ // If no iconv, just throw an exception. - JvThrow (new java::io::UnsupportedEncodingException); + throw new java::io::UnsupportedEncodingException (encoding); #endif /* HAVE_ICONV */ } @@ -128,14 +140,15 @@ gnu::gcj::convert::Output_iconv::finalize (void) jint gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer, - jint inpos, jint count) + jint inpos, jint inlength) { #ifdef HAVE_ICONV jchar *chars = elements (inbuffer); jbyte *out = elements (buf); + jchar *temp_buffer = NULL; - size_t inavail = count; - size_t old_in = count; + size_t inavail = inlength * sizeof (jchar); + size_t old_in = inavail; size_t outavail = buf->length - count; size_t old_out = outavail; @@ -143,14 +156,88 @@ gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer, char *inbuf = (char *) &chars[inpos]; char *outbuf = (char *) &out[count]; - size_t r = iconv_adapter (iconv, (iconv_t) handle, - &inbuf, &inavail, - &outbuf, &outavail); - // FIXME: what if R==-1? + if (iconv_byte_swap) + { + // Ugly performance penalty -- don't use losing systems! + temp_buffer = (jchar *) _Jv_Malloc (inlength * sizeof (jchar)); + for (int i = 0; i < inlength; ++i) + { + // Byte swap. + jchar c = (((chars[inpos + i] & 0xff) << 8) + | ((chars[inpos + i] >> 8) & 0xff)); + temp_buffer[i] = c; + } + inbuf = (char *) temp_buffer; + } + + // If the conversion fails on the very first character, then we + // assume that the character can't be represented in the output + // encoding. There's nothing useful we can do here, so we simply + // omit that character. Note that we can't check `errno' because + // glibc 2.1.3 doesn't set it correctly. We could check it if we + // really needed to, but we'd have to disable support for 2.1.3. + size_t loop_old_in = old_in; + while (1) + { + size_t r = iconv_adapter (iconv, (iconv_t) handle, + &inbuf, &inavail, + &outbuf, &outavail); + if (r == -1 && inavail == loop_old_in) + { + inavail -= 2; + if (inavail == 0) + break; + loop_old_in -= 2; + inbuf += 2; + } + else + break; + } + + if (temp_buffer != NULL) + _Jv_Free (temp_buffer); count += old_out - outavail; - return old_in - inavail; + return (old_in - inavail) / sizeof (jchar); #else /* HAVE_ICONV */ return -1; #endif /* HAVE_ICONV */ } + +jboolean +gnu::gcj::convert::IOConverter::iconv_init (void) +{ + // Some versions of iconv() always return their UCS-2 results in + // big-endian order, and they also require UCS-2 inputs to be in + // big-endian order. For instance, glibc 2.1.3 does this. If the + // UTF-8=>UCS-2 iconv converter has this feature, then we assume + // that all UCS-2 converters do. (This might not be the best + // heuristic, but is is all we've got.) + jboolean result = false; +#ifdef HAVE_ICONV + iconv_t handle = iconv_open ("UCS-2", "UTF-8"); + if (handle != (iconv_t) -1) + { + jchar c; + unsigned char in[3]; + char *inp, *outp; + size_t inc, outc, r; + + // This is the UTF-8 encoding of \ufeff. + in[0] = 0xef; + in[1] = 0xbb; + in[2] = 0xbf; + + inp = (char *) in; + inc = 3; + outp = (char *) &c; + outc = 2; + + r = iconv_adapter (iconv, handle, &inp, &inc, &outp, &outc); + // Conversion must be complete for us to use the result. + if (r != (size_t) -1 && inc == 0 && outc == 0) + result = (c != 0xfeff); + } +#endif /* HAVE_ICONV */ + return result; +} |