summaryrefslogtreecommitdiff
path: root/ext/json/ext/parser/parser.rl
diff options
context:
space:
mode:
Diffstat (limited to 'ext/json/ext/parser/parser.rl')
-rw-r--r--ext/json/ext/parser/parser.rl85
1 files changed, 68 insertions, 17 deletions
diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl
index 384ab9f..7de7bb1 100644
--- a/ext/json/ext/parser/parser.rl
+++ b/ext/json/ext/parser/parser.rl
@@ -19,15 +19,19 @@
#ifdef HAVE_RUBY_ENCODING_H
#include "ruby/encoding.h"
#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding())
+static VALUE mEncoding_ASCII_8BIT, mEncoding_UTF_8, mEncoding_UTF_16BE,
+ mEncoding_UTF_16LE, mEncoding_UTF_32BE, mEncoding_UTF_32LE;
+static ID i_encoding, i_encode, i_encode_bang, i_force_encoding;
#else
#define FORCE_UTF8(obj)
+static ID i_iconv;
#endif
static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
static VALUE CNaN, CInfinity, CMinusInfinity;
static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
- i_chr, i_max_nesting, i_allow_nan, i_object_class, i_array_class;
+ i_chr, i_max_nesting, i_allow_nan, i_object_class, i_array_class;
#define MinusInfinity "-Infinity"
@@ -484,6 +488,54 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
*
*/
+inline static VALUE convert_encoding(VALUE source)
+{
+ char *ptr = RSTRING_PTR(source);
+ long len = RSTRING_LEN(source);
+ if (len < 2) {
+ rb_raise(eParserError, "A JSON text must at least contain two octets!");
+ }
+#ifdef HAVE_RUBY_ENCODING_H
+ {
+ VALUE encoding = rb_funcall(source, i_encoding, 0);
+ if (encoding == mEncoding_ASCII_8BIT) {
+ if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
+ source = rb_str_dup(source);
+ rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_32BE);
+ source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
+ } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
+ source = rb_str_dup(source);
+ rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_16BE);
+ source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
+ } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
+ source = rb_str_dup(source);
+ rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_32LE);
+ source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
+ } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
+ source = rb_str_dup(source);
+ rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_16LE);
+ source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
+ } else {
+ source = rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_8);
+ }
+ } else {
+ source = rb_funcall(source, i_encode, 1, mEncoding_UTF_8);
+ }
+ }
+#else
+ if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
+ source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source);
+ } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
+ source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source);
+ } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
+ source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source);
+ } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
+ source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source);
+ }
+#endif
+ return source;
+}
+
/*
* call-seq: new(source, opts => {})
*
@@ -514,12 +566,9 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
VALUE source, opts;
GET_STRUCT;
rb_scan_args(argc, argv, "11", &source, &opts);
- source = StringValue(source);
+ source = convert_encoding(StringValue(source));
ptr = RSTRING_PTR(source);
len = RSTRING_LEN(source);
- if (len < 2) {
- rb_raise(eParserError, "A JSON text must at least contain two octets!");
- }
if (!NIL_P(opts)) {
opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
if (NIL_P(opts)) {
@@ -576,18 +625,6 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
json->array_class = Qnil;
}
json->current_nesting = 0;
- /*
- Convert these?
- if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
- rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
- } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
- rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
- } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
- rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
- } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
- rb_raise(eParserError, "Only UTF8 octet streams are supported atm!");
- }
- */
json->len = len;
json->source = ptr;
json->Vsource = source;
@@ -683,4 +720,18 @@ void Init_parser()
i_allow_nan = rb_intern("allow_nan");
i_object_class = rb_intern("object_class");
i_array_class = rb_intern("array_class");
+#ifdef HAVE_RUBY_ENCODING_H
+ mEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
+ mEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be"));
+ mEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le"));
+ mEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
+ mEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
+ mEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
+ i_encoding = rb_intern("encoding");
+ i_encode = rb_intern("encode");
+ i_encode_bang = rb_intern("encode!");
+ i_force_encoding = rb_intern("force_encoding");
+#else
+ i_iconv = rb_intern("iconv");
+#endif
}