diff options
author | Charles Oliver Nutter <headius@headius.com> | 2013-08-28 14:23:33 -0500 |
---|---|---|
committer | Charles Oliver Nutter <headius@headius.com> | 2013-08-28 14:23:33 -0500 |
commit | ff76308eb88a58d84878bc547c721c07cc436b66 (patch) | |
tree | ee4dc32ae4e27a50d152575b0f8da1ec0457388e | |
parent | b260422d78a64a051a250613e71aa1971511695a (diff) | |
download | psych-atambo-native_jruby.tar.gz |
Properly negotiate encoding from string or IO.atambo-native_jruby
-rw-r--r-- | ext/java/PsychParser.java | 117 |
1 files changed, 97 insertions, 20 deletions
diff --git a/ext/java/PsychParser.java b/ext/java/PsychParser.java index 8de5e3a..ba084a7 100644 --- a/ext/java/PsychParser.java +++ b/ext/java/PsychParser.java @@ -36,11 +36,16 @@ import java.nio.charset.Charset; import java.util.Map; import org.jcodings.Encoding; +import org.jcodings.specific.ASCIIEncoding; +import org.jcodings.specific.USASCIIEncoding; +import org.jcodings.specific.UTF16BEEncoding; +import org.jcodings.specific.UTF16LEEncoding; import org.jcodings.specific.UTF8Encoding; import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyClass; import org.jruby.RubyEncoding; +import org.jruby.RubyFixnum; import org.jruby.RubyIO; import org.jruby.RubyKernel; import org.jruby.RubyModule; @@ -53,6 +58,7 @@ import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; import org.jruby.util.IOInputStream; +import org.jruby.util.io.EncodingUtils; import org.jruby.util.log.Logger; import org.jruby.util.log.LoggerFactory; import org.jruby.util.unsafe.UnsafeHolder; @@ -133,35 +139,84 @@ public class PsychParser extends RubyObject { } private StreamReader readerFor(ThreadContext context, IRubyObject yaml) { - Ruby runtime = context.runtime; - - if (yaml instanceof RubyString) { - ByteList byteList = ((RubyString) yaml).getByteList(); + if (yaml.respondsTo("read")) { + Encoding encoding = transcodeIO(context, yaml); + Charset charset = encoding.getCharset(); + + return new StreamReader(new InputStreamReader(new IOInputStream(yaml), charset)); + } else { + RubyString str = yaml.convertToString(); + + Encoding encoding = transcodeString(context, str); + Charset charset = encoding.getCharset(); + + ByteList byteList = str.getByteList(); ByteArrayInputStream bais = new ByteArrayInputStream(byteList.getUnsafeBytes(), byteList.getBegin(), byteList.getRealSize()); - Charset charset = byteList.getEncoding().getCharset(); - if (charset == null) - charset = Charset.defaultCharset(); - InputStreamReader isr = new InputStreamReader(bais, charset); return new StreamReader(isr); } - - if (yaml instanceof RubyIO) { - RubyIO io = (RubyIO)yaml; - - InputStreamReader isr = new InputStreamReader(new IOInputStream(io), io.getReadEncoding().getCharset()); - return new StreamReader(isr); - } - - // fall back on IOInputStream, using default charset - if (yaml.respondsTo("read")) { - return new StreamReader(new InputStreamReader(new IOInputStream(yaml), Charset.defaultCharset())); + } + + private Encoding transcodeString(ThreadContext context, RubyString src) { + Encoding sourceEncoding = src.getEncoding(); + + if (sourceEncoding == UTF8Encoding.INSTANCE) { + return UTF8Encoding.INSTANCE; + } + + if (sourceEncoding == UTF16LEEncoding.INSTANCE) { + return UTF16LEEncoding.INSTANCE; + } + + if (sourceEncoding == UTF16BEEncoding.INSTANCE) { + return UTF16BEEncoding.INSTANCE; + } + + src.replace(src.encode(context, context.runtime.getEncodingService().convertEncodingToRubyEncoding(UTF8Encoding.INSTANCE))); + return UTF8Encoding.INSTANCE; + } + + private Encoding transcodeIO(ThreadContext context, IRubyObject src) { + IRubyObject _externalEncoding; + Encoding externalEncoding; + + _externalEncoding = src.callMethod(context, "external_encoding"); + + if (_externalEncoding.isNil()) { + externalEncoding = ASCIIEncoding.INSTANCE; } else { - throw runtime.newTypeError(yaml, runtime.getIO()); + externalEncoding = context.runtime.getEncodingService().getEncodingFromObject(_externalEncoding); + } + + if (externalEncoding == USASCIIEncoding.INSTANCE) { + externalEncoding = UTF8Encoding.INSTANCE; } + + if (externalEncoding == UTF8Encoding.INSTANCE) { + return UTF8Encoding.INSTANCE; + } + + if (externalEncoding == UTF16LEEncoding.INSTANCE) { + return UTF16LEEncoding.INSTANCE; + } + + if (externalEncoding == UTF16BEEncoding.INSTANCE) { + return UTF16BEEncoding.INSTANCE; + } + + if (externalEncoding == ASCIIEncoding.INSTANCE) { + // we have no way to auto-detect the stream's encoding, so use UTF-8 + return UTF8Encoding.INSTANCE; + } + + // we have no way to auto-detect the stream's encoding, so raise error + raiseParserException(context, "unsupported YAML encoding: " + externalEncoding.toString()); + + // not reached + throw null; } @JRubyMethod @@ -289,6 +344,28 @@ public class PsychParser extends RubyObject { invoke(context, handler, "start_sequence", anchor, tag, implicit, style); } + + private static void raiseParserException(ThreadContext context, String error) { + Ruby runtime; + RubyClass se; + IRubyObject exception; + + runtime = context.runtime; + se = (RubyClass) runtime.getModule("Psych").getConstant("SyntaxError"); + + exception = se.newInstance(context, + new IRubyObject[] { + context.nil, + RubyFixnum.zero(runtime), + RubyFixnum.zero(runtime), + RubyFixnum.zero(runtime), + context.nil, + runtime.newString(error) + }, + Block.NULL_BLOCK); + + RubyKernel.raise(context, runtime.getKernel(), new IRubyObject[] { exception }, Block.NULL_BLOCK); + } private static void raiseParserException(ThreadContext context, IRubyObject yaml, ReaderException re, IRubyObject rbPath) { |