diff options
Diffstat (limited to 'ext/java/org/jruby/ext/psych/PsychParser.java')
-rw-r--r-- | ext/java/org/jruby/ext/psych/PsychParser.java | 192 |
1 files changed, 104 insertions, 88 deletions
diff --git a/ext/java/org/jruby/ext/psych/PsychParser.java b/ext/java/org/jruby/ext/psych/PsychParser.java index 9d116fa..c4280d4 100644 --- a/ext/java/org/jruby/ext/psych/PsychParser.java +++ b/ext/java/org/jruby/ext/psych/PsychParser.java @@ -33,6 +33,7 @@ import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; import java.nio.charset.MalformedInputException; +import java.util.Arrays; import java.util.Map; import org.jcodings.Encoding; @@ -54,13 +55,12 @@ import org.jruby.anno.JRubyMethod; import static org.jruby.ext.psych.PsychLibrary.YAMLEncoding.*; import org.jruby.runtime.Block; import org.jruby.runtime.Helpers; -import org.jruby.runtime.ObjectAllocator; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.runtime.callsite.CachingCallSite; +import org.jruby.runtime.callsite.FunctionalCachingCallSite; import org.jruby.util.IOInputStream; import org.jruby.util.io.EncodingUtils; -import org.jruby.util.log.Logger; -import org.jruby.util.log.LoggerFactory; import org.yaml.snakeyaml.DumperOptions; import org.yaml.snakeyaml.error.Mark; import org.yaml.snakeyaml.error.MarkedYAMLException; @@ -86,14 +86,22 @@ import org.jruby.util.ByteList; public class PsychParser extends RubyObject { - private static final Logger LOG = LoggerFactory.getLogger(PsychParser.class); - + public static final String JRUBY_CALL_SITES = "_jruby_call_sites"; + + private enum Call { + path, event_location, start_stream, start_document, end_document, alias, scalar, start_sequence, end_sequence, start_mapping, end_mapping, end_stream + } + + final CachingCallSite[] sites; + public static void initPsychParser(Ruby runtime, RubyModule psych) { - RubyClass psychParser = runtime.defineClassUnder("Parser", runtime.getObject(), new ObjectAllocator() { - public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new PsychParser(runtime, klazz); - } - }, psych); + RubyClass psychParser = runtime.defineClassUnder("Parser", runtime.getObject(), PsychParser::new, psych); + + CachingCallSite[] sites = + Arrays.stream(Call.values()) + .map((call) -> new FunctionalCachingCallSite(call.name())) + .toArray(CachingCallSite[]::new); + psychParser.setInternalVariable(JRUBY_CALL_SITES, sites); runtime.getLoadService().require("psych/syntax_error"); psychParser.defineConstant("ANY", runtime.newFixnum(YAML_ANY_ENCODING.ordinal())); @@ -106,15 +114,17 @@ public class PsychParser extends RubyObject { public PsychParser(Ruby runtime, RubyClass klass) { super(runtime, klass); + + this.sites = (CachingCallSite[]) klass.getInternalVariable(JRUBY_CALL_SITES); } - private IRubyObject stringOrNilFor(ThreadContext context, String value, boolean tainted) { + private IRubyObject stringOrNilFor(ThreadContext context, String value) { if (value == null) return context.nil; - return stringFor(context, value, tainted); + return stringFor(context, value); } - private RubyString stringFor(ThreadContext context, String value, boolean tainted) { + private RubyString stringFor(ThreadContext context, String value) { Ruby runtime = context.runtime; Encoding encoding = runtime.getDefaultInternalEncoding(); @@ -130,52 +140,35 @@ public class PsychParser extends RubyObject { ByteList bytes = new ByteList(value.getBytes(charset), encoding); RubyString string = RubyString.newString(runtime, bytes); - string.setTaint(tainted); - return string; } private StreamReader readerFor(ThreadContext context, IRubyObject yaml) { if (yaml instanceof RubyString) { - ByteList byteList = ((RubyString)yaml).getByteList(); - Encoding enc = byteList.getEncoding(); - - // if not unicode, transcode to UTF8 - if (!(enc instanceof UnicodeEncoding)) { - byteList = EncodingUtils.strConvEnc(context, byteList, enc, UTF8Encoding.INSTANCE); - enc = UTF8Encoding.INSTANCE; - } - - ByteArrayInputStream bais = new ByteArrayInputStream(byteList.getUnsafeBytes(), byteList.getBegin(), byteList.getRealSize()); - - Charset charset = enc.getCharset(); - - assert charset != null : "charset for encoding " + enc + " should not be null"; - - InputStreamReader isr = new InputStreamReader(bais, charset); - - return new StreamReader(isr); + return readerForString(context, (RubyString) yaml); } // fall back on IOInputStream, using default charset - if (yaml.respondsTo("read")) { - Charset charset = null; - if (yaml instanceof RubyIO) { + return readerForIO(context, yaml); + } + + private static StreamReader readerForIO(ThreadContext context, IRubyObject yaml) { + boolean isIO = yaml instanceof RubyIO; + if (isIO || yaml.respondsTo("read")) { + // default to UTF8 unless RubyIO has UTF16 as encoding + Charset charset = RubyEncoding.UTF8; + + if (isIO) { Encoding enc = ((RubyIO) yaml).getReadEncoding(); - charset = enc.getCharset(); // libyaml treats non-utf encodings as utf-8 and hopes for the best. - if (!(enc instanceof UTF8Encoding) && !(enc instanceof UTF16LEEncoding) && !(enc instanceof UTF16BEEncoding)) { - charset = UTF8Encoding.INSTANCE.getCharset(); + if (enc instanceof UTF16LEEncoding || enc instanceof UTF16BEEncoding) { + charset = enc.getCharset(); } } - if (charset == null) { - // If we can't get it from the IO or it doesn't have a charset, fall back on UTF-8 - charset = UTF8Encoding.INSTANCE.getCharset(); - } + CharsetDecoder decoder = charset.newDecoder(); decoder.onMalformedInput(CodingErrorAction.REPORT); - decoder.onMalformedInput(CodingErrorAction.REPORT); return new StreamReader(new InputStreamReader(new IOInputStream(yaml), decoder)); } else { @@ -185,59 +178,83 @@ public class PsychParser extends RubyObject { } } + private static StreamReader readerForString(ThreadContext context, RubyString string) { + ByteList byteList = string.getByteList(); + Encoding enc = byteList.getEncoding(); + + // if not unicode, transcode to UTF8 + if (!(enc instanceof UnicodeEncoding)) { + byteList = EncodingUtils.strConvEnc(context, byteList, enc, UTF8Encoding.INSTANCE); + enc = UTF8Encoding.INSTANCE; + } + + ByteArrayInputStream bais = new ByteArrayInputStream(byteList.getUnsafeBytes(), byteList.getBegin(), byteList.getRealSize()); + + Charset charset = enc.getCharset(); + + assert charset != null : "charset for encoding " + enc + " should not be null"; + + InputStreamReader isr = new InputStreamReader(bais, charset); + + return new StreamReader(isr); + } + @JRubyMethod(name = "_native_parse") public IRubyObject parse(ThreadContext context, IRubyObject handler, IRubyObject yaml, IRubyObject path) { Ruby runtime = context.runtime; - boolean tainted = yaml.isTaint() || yaml instanceof RubyIO; try { parser = new ParserImpl(readerFor(context, yaml)); if (path.isNil() && yaml.respondsTo("path")) { - path = yaml.callMethod(context, "path"); + path = sites[Call.path.ordinal()].call(context, this, yaml); } while (true) { event = parser.getEvent(); - IRubyObject start_line = runtime.newFixnum(event.getStartMark().getLine()); - IRubyObject start_column = runtime.newFixnum(event.getStartMark().getColumn()); - IRubyObject end_line = runtime.newFixnum(event.getEndMark().getLine()); - IRubyObject end_column = runtime.newFixnum(event.getEndMark().getColumn()); - invoke(context, handler, "event_location", start_line, start_column, end_line, end_column); + Mark start = event.getStartMark(); + IRubyObject start_line = runtime.newFixnum(start.getLine()); + IRubyObject start_column = runtime.newFixnum(start.getColumn()); + + Mark end = event.getEndMark(); + IRubyObject end_line = runtime.newFixnum(end.getLine()); + IRubyObject end_column = runtime.newFixnum(end.getColumn()); + + sites[Call.event_location.ordinal()].call(context, this, handler, start_line, start_column, end_line, end_column); // FIXME: Event should expose a getID, so it can be switched if (event.is(ID.StreamStart)) { - invoke(context, handler, "start_stream", runtime.newFixnum(YAML_ANY_ENCODING.ordinal())); + sites[Call.start_stream.ordinal()].call(context, this, handler, runtime.newFixnum(YAML_ANY_ENCODING.ordinal())); } else if (event.is(ID.DocumentStart)) { - handleDocumentStart(context, (DocumentStartEvent) event, tainted, handler); + handleDocumentStart(context, (DocumentStartEvent) event, handler); } else if (event.is(ID.DocumentEnd)) { IRubyObject notExplicit = runtime.newBoolean(!((DocumentEndEvent) event).getExplicit()); - - invoke(context, handler, "end_document", notExplicit); + + sites[Call.end_document.ordinal()].call(context, this, handler, notExplicit); } else if (event.is(ID.Alias)) { - IRubyObject alias = stringOrNilFor(context, ((AliasEvent)event).getAnchor(), tainted); + IRubyObject alias = stringOrNilFor(context, ((AliasEvent)event).getAnchor()); - invoke(context, handler, "alias", alias); + sites[Call.alias.ordinal()].call(context, this, handler, alias); } else if (event.is(ID.Scalar)) { - handleScalar(context, (ScalarEvent) event, tainted, handler); + handleScalar(context, (ScalarEvent) event, handler); } else if (event.is(ID.SequenceStart)) { - handleSequenceStart(context,(SequenceStartEvent) event, tainted, handler); + handleSequenceStart(context, (SequenceStartEvent) event, handler); } else if (event.is(ID.SequenceEnd)) { - invoke(context, handler, "end_sequence"); + sites[Call.end_sequence.ordinal()].call(context, this, handler); } else if (event.is(ID.MappingStart)) { - handleMappingStart(context, (MappingStartEvent) event, tainted, handler); + handleMappingStart(context, (MappingStartEvent) event, handler); } else if (event.is(ID.MappingEnd)) { - invoke(context, handler, "end_mapping"); + sites[Call.end_mapping.ordinal()].call(context, this, handler); } else if (event.is(ID.StreamEnd)) { - invoke(context, handler, "end_stream"); + sites[Call.end_stream.ordinal()].call(context, this, handler); break; } } } catch (ParserException pe) { parser = null; - raiseParserException(context, yaml, pe, path); + raiseParserException(context, pe, path); } catch (ScannerException se) { parser = null; @@ -245,18 +262,18 @@ public class PsychParser extends RubyObject { if (se.getProblemMark() != null) { message.append(se.getProblemMark().toString()); } - raiseParserException(context, yaml, se, path); + raiseParserException(context, se, path); } catch (ReaderException re) { parser = null; - raiseParserException(context, yaml, re, path); + raiseParserException(context, re, path); } catch (YAMLException ye) { Throwable cause = ye.getCause(); if (cause instanceof MalformedInputException) { // failure due to improperly encoded input - raiseParserException(context, yaml, (MalformedInputException) cause, path); + raiseParserException(context, (MalformedInputException) cause, path); } throw ye; @@ -269,7 +286,7 @@ public class PsychParser extends RubyObject { return this; } - private void handleDocumentStart(ThreadContext context, DocumentStartEvent dse, boolean tainted, IRubyObject handler) { + private void handleDocumentStart(ThreadContext context, DocumentStartEvent dse, IRubyObject handler) { Ruby runtime = context.runtime; DumperOptions.Version _version = dse.getVersion(); IRubyObject version = _version == null ? @@ -280,8 +297,8 @@ public class PsychParser extends RubyObject { RubyArray tags = RubyArray.newArray(runtime); if (tagsMap != null && tagsMap.size() > 0) { for (Map.Entry<String, String> tag : tagsMap.entrySet()) { - IRubyObject key = stringFor(context, tag.getKey(), tainted); - IRubyObject value = stringFor(context, tag.getValue(), tainted); + IRubyObject key = stringFor(context, tag.getKey()); + IRubyObject value = stringFor(context, tag.getValue()); tags.append(RubyArray.newArray(runtime, key, value)); } @@ -291,41 +308,41 @@ public class PsychParser extends RubyObject { invoke(context, handler, "start_document", version, tags, notExplicit); } - private void handleMappingStart(ThreadContext context, MappingStartEvent mse, boolean tainted, IRubyObject handler) { + private void handleMappingStart(ThreadContext context, MappingStartEvent mse, IRubyObject handler) { Ruby runtime = context.runtime; - IRubyObject anchor = stringOrNilFor(context, mse.getAnchor(), tainted); - IRubyObject tag = stringOrNilFor(context, mse.getTag(), tainted); + IRubyObject anchor = stringOrNilFor(context, mse.getAnchor()); + IRubyObject tag = stringOrNilFor(context, mse.getTag()); IRubyObject implicit = runtime.newBoolean(mse.getImplicit()); IRubyObject style = runtime.newFixnum(translateFlowStyle(mse.getFlowStyle())); - invoke(context, handler, "start_mapping", anchor, tag, implicit, style); + sites[Call.start_mapping.ordinal()].call(context, this, handler, anchor, tag, implicit, style); } - private void handleScalar(ThreadContext context, ScalarEvent se, boolean tainted, IRubyObject handler) { + private void handleScalar(ThreadContext context, ScalarEvent se, IRubyObject handler) { Ruby runtime = context.runtime; - IRubyObject anchor = stringOrNilFor(context, se.getAnchor(), tainted); - IRubyObject tag = stringOrNilFor(context, se.getTag(), tainted); + IRubyObject anchor = stringOrNilFor(context, se.getAnchor()); + IRubyObject tag = stringOrNilFor(context, se.getTag()); IRubyObject plain_implicit = runtime.newBoolean(se.getImplicit().canOmitTagInPlainScalar()); IRubyObject quoted_implicit = runtime.newBoolean(se.getImplicit().canOmitTagInNonPlainScalar()); IRubyObject style = runtime.newFixnum(translateStyle(se.getScalarStyle())); - IRubyObject val = stringFor(context, se.getValue(), tainted); + IRubyObject val = stringFor(context, se.getValue()); - invoke(context, handler, "scalar", val, anchor, tag, plain_implicit, + sites[Call.scalar.ordinal()].call(context, this, handler, val, anchor, tag, plain_implicit, quoted_implicit, style); } - private void handleSequenceStart(ThreadContext context, SequenceStartEvent sse, boolean tainted, IRubyObject handler) { + private void handleSequenceStart(ThreadContext context, SequenceStartEvent sse, IRubyObject handler) { Ruby runtime = context.runtime; - IRubyObject anchor = stringOrNilFor(context, sse.getAnchor(), tainted); - IRubyObject tag = stringOrNilFor(context, sse.getTag(), tainted); + IRubyObject anchor = stringOrNilFor(context, sse.getAnchor()); + IRubyObject tag = stringOrNilFor(context, sse.getTag()); IRubyObject implicit = runtime.newBoolean(sse.getImplicit()); IRubyObject style = runtime.newFixnum(translateFlowStyle(sse.getFlowStyle())); - invoke(context, handler, "start_sequence", anchor, tag, implicit, style); + sites[Call.start_sequence.ordinal()].call(context, this, handler, anchor, tag, implicit, style); } - private static void raiseParserException(ThreadContext context, IRubyObject yaml, ReaderException re, IRubyObject rbPath) { + private static void raiseParserException(ThreadContext context, ReaderException re, IRubyObject rbPath) { Ruby runtime; RubyClass se; IRubyObject exception; @@ -347,7 +364,7 @@ public class PsychParser extends RubyObject { RubyKernel.raise(context, runtime.getKernel(), new IRubyObject[] { exception }, Block.NULL_BLOCK); } - private static void raiseParserException(ThreadContext context, IRubyObject yaml, MarkedYAMLException mye, IRubyObject rbPath) { + private static void raiseParserException(ThreadContext context, MarkedYAMLException mye, IRubyObject rbPath) { Ruby runtime; Mark mark; RubyClass se; @@ -372,9 +389,8 @@ public class PsychParser extends RubyObject { RubyKernel.raise(context, runtime.getKernel(), new IRubyObject[] { exception }, Block.NULL_BLOCK); } - private static void raiseParserException(ThreadContext context, IRubyObject yaml, MalformedInputException mie, IRubyObject rbPath) { - Ruby runtime; - Mark mark; + private static void raiseParserException(ThreadContext context, MalformedInputException mie, IRubyObject rbPath) { + Ruby runtime;; RubyClass se; IRubyObject exception; |