summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlorian Frank <flori@ping.de>2009-10-20 13:04:24 +0200
committerFlorian Frank <flori@ping.de>2009-10-26 22:56:27 +0100
commit41ae3d70d6dd141759eb6f3fddf460b327a90796 (patch)
tree54431a12877a97a50f8fb077a03b6e1cdecb9f12
parent3a13313e9d231e7e3f99101812825dbe3d01d13a (diff)
downloadjson-41ae3d70d6dd141759eb6f3fddf460b327a90796.tar.gz
some optimisations and cleanup
-rw-r--r--ext/json/ext/generator/extconf.rb7
-rw-r--r--ext/json/ext/generator/generator.c24
-rw-r--r--ext/json/ext/generator/unicode.c104
-rw-r--r--ext/json/ext/generator/unicode.h14
-rw-r--r--ext/json/ext/parser/extconf.rb7
-rw-r--r--ext/json/ext/parser/parser.c41
-rw-r--r--ext/json/ext/parser/parser.rl41
-rw-r--r--ext/json/ext/parser/unicode.c88
-rw-r--r--ext/json/ext/parser/unicode.h8
9 files changed, 161 insertions, 173 deletions
diff --git a/ext/json/ext/generator/extconf.rb b/ext/json/ext/generator/extconf.rb
index 797b566..a0760e2 100644
--- a/ext/json/ext/generator/extconf.rb
+++ b/ext/json/ext/generator/extconf.rb
@@ -1,9 +1,12 @@
require 'mkmf'
require 'rbconfig'
+unless $CFLAGS.gsub!(/ -O[\dsz]?/, ' -O3')
+ $CFLAGS << ' -O3'
+end
if CONFIG['CC'] =~ /gcc/
- $CFLAGS += ' -Wall'
- #$CFLAGS += ' -O0 -ggdb'
+ $CFLAGS << ' -Wall'
+ #$CFLAGS.gsub!(/ -O[\dsz]?/, ' -O0 -ggdb')
end
have_header("ruby/st.h") || have_header("st.h")
diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c
index 558f28d..a40cbb4 100644
--- a/ext/json/ext/generator/generator.c
+++ b/ext/json/ext/generator/generator.c
@@ -24,7 +24,7 @@
#ifdef HAVE_RUBY_ENCODING_H
#include "ruby/encoding.h"
#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding())
-static VALUE mEncoding_UTF_8;
+static VALUE CEncoding_UTF_8;
static ID i_encoding, i_encode;
#else
#define FORCE_UTF8(obj)
@@ -154,7 +154,7 @@ static int hash_to_json_i(VALUE key, VALUE value, VALUE buf)
/*
* call-seq: to_json(state = nil, depth = 0)
*
- * Returns a JSON string containing a JSON object, that is unparsed from
+ * Returns a JSON string containing a JSON object, that is generated from
* this Hash instance.
* _state_ is a JSON::State object, that can also be used to configure the
* produced JSON string output further.
@@ -260,7 +260,7 @@ inline static VALUE mArray_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth
/*
* call-seq: to_json(state = nil, depth = 0)
*
- * Returns a JSON string containing a JSON array, that is unparsed from
+ * Returns a JSON string containing a JSON array, that is generated from
* this Array instance.
* _state_ is a JSON::State object, that can also be used to configure the
* produced JSON string output further.
@@ -360,14 +360,14 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
VALUE result = rb_str_buf_new(RSTRING_LEN(self));
rb_str_buf_cat2(result, "\"");
#ifdef HAVE_RUBY_ENCODING_H
- if (rb_funcall(self, i_encoding, 0) == mEncoding_UTF_8) {
- JSON_convert_UTF8_to_JSON(result, self, strictConversion);
+ if (rb_funcall(self, i_encoding, 0) == CEncoding_UTF_8) {
+ JSON_convert_UTF8_to_JSON(result, self);
} else {
- VALUE string = rb_funcall(self, i_encode, 1, mEncoding_UTF_8);
- JSON_convert_UTF8_to_JSON(result, string, strictConversion);
+ VALUE string = rb_funcall(self, i_encode, 1, CEncoding_UTF_8);
+ JSON_convert_UTF8_to_JSON(result, string);
}
#else
- JSON_convert_UTF8_to_JSON(result, self, strictConversion);
+ JSON_convert_UTF8_to_JSON(result, self);
#endif
rb_str_buf_cat2(result, "\"");
FORCE_UTF8(result);
@@ -378,7 +378,7 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
* call-seq: to_json_raw_object()
*
* This method creates a raw object hash, that can be nested into
- * other data structures and will be unparsed as a raw string. This
+ * other data structures and will be generated as a raw string. This
* method should be used, if you want to convert raw strings to JSON
* instead of UTF-8 strings, e. g. binary data.
*/
@@ -856,17 +856,19 @@ static VALUE cState_forget(VALUE self, VALUE object)
void Init_generator()
{
rb_require("json/common");
+
mJSON = rb_define_module("JSON");
mExt = rb_define_module_under(mJSON, "Ext");
mGenerator = rb_define_module_under(mExt, "Generator");
+
eGeneratorError = rb_path2class("JSON::GeneratorError");
eCircularDatastructure = rb_path2class("JSON::CircularDatastructure");
eNestingError = rb_path2class("JSON::NestingError");
+
cState = rb_define_class_under(mGenerator, "State", rb_cObject);
rb_define_alloc_func(cState, cState_s_allocate);
rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
rb_define_method(cState, "initialize", cState_initialize, -1);
-
rb_define_method(cState, "indent", cState_indent, 0);
rb_define_method(cState, "indent=", cState_indent_set, 1);
rb_define_method(cState, "space", cState_space, 0);
@@ -928,7 +930,7 @@ void Init_generator()
i_create_id = rb_intern("create_id");
i_extend = rb_intern("extend");
#ifdef HAVE_RUBY_ENCODING_H
- mEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
+ CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
i_encoding = rb_intern("encoding");
i_encode = rb_intern("encode");
#endif
diff --git a/ext/json/ext/generator/unicode.c b/ext/json/ext/generator/unicode.c
index 3ddfbe0..1d0b675 100644
--- a/ext/json/ext/generator/unicode.c
+++ b/ext/json/ext/generator/unicode.c
@@ -1,9 +1,5 @@
#include "unicode.h"
-#define unicode_escape(buffer, character) \
- snprintf(buf, 7, "\\u%04x", (unsigned int) (character)); \
- rb_str_buf_cat(buffer, buf, 6);
-
/*
* Copyright 2001-2004 Unicode, Inc.
*
@@ -53,15 +49,6 @@ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
/*
- * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
- * into the first byte, depending on how many bytes follow. There are
- * as many entries in this table as there are UTF-8 sequence types.
- * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
- * for *legal* UTF-8 will be 4 or fewer bytes total.
- */
-static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-
-/*
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
* This must be called with the length pre-determined by the first byte.
* If not calling this from ConvertUTF8to*, then the length can be set by:
@@ -98,9 +85,21 @@ inline static unsigned char isLegalUTF8(const UTF8 *source, int length)
return 1;
}
-void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags)
+inline static void unicode_escape(VALUE buffer, UTF16 character)
+{
+ const char *digits = "0123456789abcdef";
+ char buf[7] = { '\\', 'u' };
+
+ buf[6] = 0;
+ buf[2] = digits[character >> 12];
+ buf[3] = digits[(character >> 8) & 0xf];
+ buf[4] = digits[(character >> 4) & 0xf];
+ buf[5] = digits[character & 0xf];
+ rb_str_buf_cat(buffer, buf, 6);
+}
+
+inline void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string)
{
- char buf[7];
const UTF8* source = (UTF8 *) RSTRING_PTR(string);
const UTF8* sourceEnd = source + RSTRING_LEN(string);
@@ -131,45 +130,54 @@ void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
/* UTF-16 surrogate values are illegal in UTF-32 */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
- if (flags == strictConversion) {
- source -= (extraBytesToRead+1); /* return to the illegal value itself */
- rb_raise(rb_path2class("JSON::GeneratorError"),
+#if UNI_STRICT_CONVERSION
+ source -= (extraBytesToRead+1); /* return to the illegal value itself */
+ rb_raise(rb_path2class("JSON::GeneratorError"),
"source sequence is illegal/malformed utf-8");
- } else {
- unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
- }
+#else
+ unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
+#endif
} else {
/* normal case */
- if (ch == '"') {
- rb_str_buf_cat2(buffer, "\\\"");
- } else if (ch == '\\') {
- rb_str_buf_cat2(buffer, "\\\\");
- } else if (ch >= 0x20 && ch <= 0x7f) {
- rb_str_buf_cat(buffer, (char *) source - 1, 1);
- } else if (ch == '\n') {
- rb_str_buf_cat2(buffer, "\\n");
- } else if (ch == '\r') {
- rb_str_buf_cat2(buffer, "\\r");
- } else if (ch == '\t') {
- rb_str_buf_cat2(buffer, "\\t");
- } else if (ch == '\f') {
- rb_str_buf_cat2(buffer, "\\f");
- } else if (ch == '\b') {
- rb_str_buf_cat2(buffer, "\\b");
- } else if (ch < 0x20) {
- unicode_escape(buffer, (UTF16) ch);
- } else {
- unicode_escape(buffer, (UTF16) ch);
+ switch(ch) {
+ case '\n':
+ rb_str_buf_cat2(buffer, "\\n");
+ break;
+ case '\r':
+ rb_str_buf_cat2(buffer, "\\r");
+ break;
+ case '\\':
+ rb_str_buf_cat2(buffer, "\\\\");
+ break;
+ case '"':
+ rb_str_buf_cat2(buffer, "\\\"");
+ break;
+ case '\t':
+ rb_str_buf_cat2(buffer, "\\t");
+ break;
+ case '\f':
+ rb_str_buf_cat2(buffer, "\\f");
+ break;
+ case '\b':
+ rb_str_buf_cat2(buffer, "\\b");
+ break;
+ default:
+ if (ch >= 0x20 && ch <= 0x7f) {
+ rb_str_buf_cat(buffer, (char *) source - 1, 1);
+ } else {
+ unicode_escape(buffer, (UTF16) ch);
+ }
+ break;
}
}
} else if (ch > UNI_MAX_UTF16) {
- if (flags == strictConversion) {
- source -= (extraBytesToRead+1); /* return to the start */
- rb_raise(rb_path2class("JSON::GeneratorError"),
- "source sequence is illegal/malformed utf8");
- } else {
- unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
- }
+#if UNI_STRICT_CONVERSION
+ source -= (extraBytesToRead+1); /* return to the start */
+ rb_raise(rb_path2class("JSON::GeneratorError"),
+ "source sequence is illegal/malformed utf8");
+#else
+ unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
+#endif
} else {
/* target is a character in range 0xFFFF - 0x10FFFF. */
ch -= halfBase;
diff --git a/ext/json/ext/generator/unicode.h b/ext/json/ext/generator/unicode.h
index 841474b..77b29a4 100644
--- a/ext/json/ext/generator/unicode.h
+++ b/ext/json/ext/generator/unicode.h
@@ -3,17 +3,7 @@
#ifndef _GENERATOR_UNICODE_H_
#define _GENERATOR_UNICODE_H_
-typedef enum {
- conversionOK = 0, /* conversion successful */
- sourceExhausted, /* partial character in source, but hit end */
- targetExhausted, /* insuff. room in target for conversion */
- sourceIllegal /* source sequence is illegal/malformed */
-} ConversionResult;
-
-typedef enum {
- strictConversion = 0,
- lenientConversion
-} ConversionFlags;
+#define UNI_STRICT_CONVERSION 1
typedef unsigned long UTF32; /* at least 32 bits */
typedef unsigned short UTF16; /* at least 16 bits */
@@ -35,7 +25,7 @@ static const int halfShift = 10; /* used for shifting by 10 bits */
static const UTF32 halfBase = 0x0010000UL;
static const UTF32 halfMask = 0x3FFUL;
-void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags);
+inline void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string);
#ifndef RARRAY_PTR
#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb
index 6226394..9662e9a 100644
--- a/ext/json/ext/parser/extconf.rb
+++ b/ext/json/ext/parser/extconf.rb
@@ -1,9 +1,12 @@
require 'mkmf'
require 'rbconfig'
+unless $CFLAGS.gsub!(/ -O[\dsz]?/, ' -O3')
+ $CFLAGS << ' -O3'
+end
if CONFIG['CC'] =~ /gcc/
- $CFLAGS += ' -Wall'
- #$CFLAGS += ' -O0 -ggdb'
+ $CFLAGS << ' -Wall'
+ #$CFLAGS.gsub!(/ -O[\dsz]?/, ' -O0 -ggdb')
end
have_header("ruby/st.h") || have_header("st.h")
diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c
index 1781381..d2dfe52 100644
--- a/ext/json/ext/parser/parser.c
+++ b/ext/json/ext/parser/parser.c
@@ -1262,7 +1262,6 @@ static VALUE json_string_unescape(char *p, char *pe)
while (p < pe) {
if (*p == '\\') {
p++;
- if (p >= pe) return Qnil; /* raise an exception later, \ at end */
switch (*p) {
case '"':
case '\\':
@@ -1293,7 +1292,7 @@ static VALUE json_string_unescape(char *p, char *pe)
if (p > pe - 4) {
return Qnil;
} else {
- p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion);
+ p = JSON_convert_UTF16_to_UTF8(result, p, pe);
}
break;
default:
@@ -1312,7 +1311,7 @@ static VALUE json_string_unescape(char *p, char *pe)
}
-#line 1316 "parser.c"
+#line 1315 "parser.c"
static const int JSON_string_start = 1;
static const int JSON_string_first_final = 8;
static const int JSON_string_error = 0;
@@ -1320,7 +1319,7 @@ static const int JSON_string_error = 0;
static const int JSON_string_en_main = 1;
-#line 433 "parser.rl"
+#line 432 "parser.rl"
static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
@@ -1329,15 +1328,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
*result = rb_str_new("", 0);
-#line 1333 "parser.c"
+#line 1332 "parser.c"
{
cs = JSON_string_start;
}
-#line 441 "parser.rl"
+#line 440 "parser.rl"
json->memo = p;
-#line 1341 "parser.c"
+#line 1340 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -1362,7 +1361,7 @@ case 2:
goto st0;
goto st2;
tr2:
-#line 419 "parser.rl"
+#line 418 "parser.rl"
{
*result = json_string_unescape(json->memo + 1, p);
if (NIL_P(*result)) {
@@ -1373,14 +1372,14 @@ tr2:
{p = (( p + 1))-1;}
}
}
-#line 430 "parser.rl"
+#line 429 "parser.rl"
{ p--; {p++; cs = 8; goto _out;} }
goto st8;
st8:
if ( ++p == pe )
goto _test_eof8;
case 8:
-#line 1384 "parser.c"
+#line 1383 "parser.c"
goto st0;
st3:
if ( ++p == pe )
@@ -1456,7 +1455,7 @@ case 7:
_out: {}
}
-#line 443 "parser.rl"
+#line 442 "parser.rl"
if (cs >= JSON_string_first_final) {
return p + 1;
@@ -1467,7 +1466,7 @@ case 7:
-#line 1471 "parser.c"
+#line 1470 "parser.c"
static const int JSON_start = 1;
static const int JSON_first_final = 10;
static const int JSON_error = 0;
@@ -1475,7 +1474,7 @@ static const int JSON_error = 0;
static const int JSON_en_main = 1;
-#line 477 "parser.rl"
+#line 476 "parser.rl"
/*
@@ -1518,7 +1517,7 @@ inline static VALUE convert_encoding(VALUE source)
rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_16LE);
source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
} else {
- source = rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_8);
+ FORCE_UTF8(source);
}
} else {
source = rb_funcall(source, i_encode, 1, mEncoding_UTF_8);
@@ -1647,16 +1646,16 @@ static VALUE cParser_parse(VALUE self)
GET_STRUCT;
-#line 1651 "parser.c"
+#line 1650 "parser.c"
{
cs = JSON_start;
}
-#line 648 "parser.rl"
+#line 647 "parser.rl"
p = json->source;
pe = p + json->len;
-#line 1660 "parser.c"
+#line 1659 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -1712,7 +1711,7 @@ case 5:
goto st1;
goto st5;
tr3:
-#line 466 "parser.rl"
+#line 465 "parser.rl"
{
char *np;
json->current_nesting = 1;
@@ -1721,7 +1720,7 @@ tr3:
}
goto st10;
tr4:
-#line 459 "parser.rl"
+#line 458 "parser.rl"
{
char *np;
json->current_nesting = 1;
@@ -1733,7 +1732,7 @@ st10:
if ( ++p == pe )
goto _test_eof10;
case 10:
-#line 1737 "parser.c"
+#line 1736 "parser.c"
switch( (*p) ) {
case 13: goto st10;
case 32: goto st10;
@@ -1790,7 +1789,7 @@ case 9:
_out: {}
}
-#line 651 "parser.rl"
+#line 650 "parser.rl"
if (cs >= JSON_first_final && p == pe) {
return result;
diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl
index 7de7bb1..8eca179 100644
--- a/ext/json/ext/parser/parser.rl
+++ b/ext/json/ext/parser/parser.rl
@@ -19,8 +19,8 @@
#ifdef HAVE_RUBY_ENCODING_H
#include "ruby/encoding.h"
#define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding())
-static VALUE mEncoding_ASCII_8BIT, mEncoding_UTF_8, mEncoding_UTF_16BE,
- mEncoding_UTF_16LE, mEncoding_UTF_32BE, mEncoding_UTF_32LE;
+static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE,
+ CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE;
static ID i_encoding, i_encode, i_encode_bang, i_force_encoding;
#else
#define FORCE_UTF8(obj)
@@ -361,7 +361,6 @@ static VALUE json_string_unescape(char *p, char *pe)
while (p < pe) {
if (*p == '\\') {
p++;
- if (p >= pe) return Qnil; /* raise an exception later, \ at end */
switch (*p) {
case '"':
case '\\':
@@ -392,7 +391,7 @@ static VALUE json_string_unescape(char *p, char *pe)
if (p > pe - 4) {
return Qnil;
} else {
- p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion);
+ p = JSON_convert_UTF16_to_UTF8(result, p, pe);
}
break;
default:
@@ -498,28 +497,28 @@ inline static VALUE convert_encoding(VALUE source)
#ifdef HAVE_RUBY_ENCODING_H
{
VALUE encoding = rb_funcall(source, i_encoding, 0);
- if (encoding == mEncoding_ASCII_8BIT) {
+ if (encoding == CEncoding_ASCII_8BIT) {
if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_32BE);
- source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
+ rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32BE);
+ source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
} else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_16BE);
- source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
+ rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16BE);
+ source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
} else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_32LE);
- source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
+ rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32LE);
+ source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
} else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_16LE);
- source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
+ rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16LE);
+ source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
} else {
- source = rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_8);
+ FORCE_UTF8(source);
}
} else {
- source = rb_funcall(source, i_encode, 1, mEncoding_UTF_8);
+ source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8);
}
}
#else
@@ -721,12 +720,12 @@ void Init_parser()
i_object_class = rb_intern("object_class");
i_array_class = rb_intern("array_class");
#ifdef HAVE_RUBY_ENCODING_H
- mEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
- mEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be"));
- mEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le"));
- mEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
- mEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
- mEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
+ CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
+ CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be"));
+ CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le"));
+ CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
+ CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
+ CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
i_encoding = rb_intern("encoding");
i_encode = rb_intern("encode");
i_encode_bang = rb_intern("encode!");
diff --git a/ext/json/ext/parser/unicode.c b/ext/json/ext/parser/unicode.c
index 6bd29e2..711aac5 100644
--- a/ext/json/ext/parser/unicode.c
+++ b/ext/json/ext/parser/unicode.c
@@ -23,32 +23,6 @@
*/
/*
- * Index into the table below with the first byte of a UTF-8 sequence to
- * get the number of trailing bytes that are supposed to follow it.
- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
- * left as-is for anyone who may want to do such conversion, which was
- * allowed in earlier algorithms.
- */
-static const char trailingBytesForUTF8[256] = {
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
-};
-
-/*
- * Magic values subtracted from a buffer value during UTF8 conversion.
- * This table contains as many values as there might be trailing bytes
- * in a UTF-8 sequence.
- */
-static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
- 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
-
-/*
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
* into the first byte, depending on how many bytes follow. There are
* as many entries in this table as there are UTF-8 sequence types.
@@ -57,34 +31,61 @@ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080
*/
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+static const char digit_values[256] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
+ -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1
+};
+
char *JSON_convert_UTF16_to_UTF8 (
VALUE buffer,
char *source,
- char *sourceEnd,
- ConversionFlags flags)
+ char *sourceEnd)
{
UTF16 *tmp, *tmpPtr, *tmpEnd;
char buf[5];
- long n = 0, i;
- char *p = source - 1;
+ long n = 0;
+ char failed = 1, c, *p = source - 1;
while (p < sourceEnd && p[0] == '\\' && p[1] == 'u') {
p += 6;
n++;
}
p = source + 1;
- buf[4] = 0;
tmpPtr = tmp = ALLOC_N(UTF16, n);
tmpEnd = tmp + n;
- for (i = 0; i < n; i++) {
- buf[0] = *p++;
- buf[1] = *p++;
- buf[2] = *p++;
- buf[3] = *p++;
- tmpPtr[i] = (UTF16)strtol(buf, NULL, 16);
+ while (tmpPtr < tmpEnd) {
+ c = digit_values[(unsigned char) *p++];
+ failed *= c;
+ *tmpPtr = c << 12;
+ c = digit_values[(unsigned char) *p++];
+ failed *= c;
+ *tmpPtr |= c << 8;
+ c = digit_values[(unsigned char) *p++];
+ failed *= c;
+ *tmpPtr |= c << 4;
+ c = digit_values[(unsigned char) *p++];
+ failed *= c;
+ *tmpPtr++ |= c;
p += 2;
}
+ if (failed < 0) {
+ rb_raise(rb_path2class("JSON::ParserError"),
+ "illegal \\uXXXX unicode value near %s", source);
+ }
+ tmpPtr = tmp;
while (tmpPtr < tmpEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
@@ -102,10 +103,6 @@ char *JSON_convert_UTF16_to_UTF8 (
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
++tmpPtr;
- } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
- ruby_xfree(tmp);
- rb_raise(rb_path2class("JSON::ParserError"),
- "\\uXXXX is illegal/malformed utf-16 near %s", source);
}
} else { /* We don't have the 16 bits following the high surrogate. */
ruby_xfree(tmp);
@@ -113,13 +110,6 @@ char *JSON_convert_UTF16_to_UTF8 (
"partial character in source, but hit end near %s", source);
break;
}
- } else if (flags == strictConversion) {
- /* UTF-16 surrogate values are illegal in UTF-32 */
- if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
- ruby_xfree(tmp);
- rb_raise(rb_path2class("JSON::ParserError"),
- "\\uXXXX is illegal/malformed utf-16 near %s", source);
- }
}
/* Figure out how many bytes the result will require */
if (ch < (UTF32) 0x80) {
@@ -149,6 +139,6 @@ char *JSON_convert_UTF16_to_UTF8 (
rb_str_buf_cat(buffer, p, bytesToWrite);
}
ruby_xfree(tmp);
- source += 5 + (n - 1) * 6;
+ source += 6 * n - 1;
return source;
}
diff --git a/ext/json/ext/parser/unicode.h b/ext/json/ext/parser/unicode.h
index 155da0c..40de426 100644
--- a/ext/json/ext/parser/unicode.h
+++ b/ext/json/ext/parser/unicode.h
@@ -31,16 +31,10 @@ typedef enum {
sourceIllegal /* source sequence is illegal/malformed */
} ConversionResult;
-typedef enum {
- strictConversion = 0,
- lenientConversion
-} ConversionFlags;
-
char *JSON_convert_UTF16_to_UTF8 (
VALUE buffer,
char *source,
- char *sourceEnd,
- ConversionFlags flags);
+ char *sourceEnd);
#ifndef RARRAY_PTR
#define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr