some optimisations and cleanup

author: Florian Frank <flori@ping.de> 2009-10-20 13:04:24 +0200
committer: Florian Frank <flori@ping.de> 2009-10-26 22:56:27 +0100
commit: 41ae3d70d6dd141759eb6f3fddf460b327a90796 (patch)
tree: 54431a12877a97a50f8fb077a03b6e1cdecb9f12
parent: 3a13313e9d231e7e3f99101812825dbe3d01d13a (diff)
download: json-41ae3d70d6dd141759eb6f3fddf460b327a90796.tar.gz
9 files changed, 161 insertions, 173 deletions
diff --git a/ext/json/ext/generator/extconf.rb b/ext/json/ext/generator/extconf.rb
index 797b566..a0760e2 100644
--- a/ext/json/ext/generator/extconf.rb
+++ b/ext/json/ext/generator/extconf.rb
@@ -1,9 +1,12 @@
 require 'mkmf'
 require 'rbconfig'
 
+unless $CFLAGS.gsub!(/ -O[\dsz]?/, ' -O3')
+  $CFLAGS << ' -O3'
+end
 if CONFIG['CC'] =~ /gcc/
-  $CFLAGS += ' -Wall'
-  #$CFLAGS += ' -O0 -ggdb'
+  $CFLAGS << ' -Wall'
+  #$CFLAGS.gsub!(/ -O[\dsz]?/, ' -O0 -ggdb')
 end
 
 have_header("ruby/st.h") || have_header("st.h")
diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c
index 558f28d..a40cbb4 100644
--- a/ext/json/ext/generator/generator.c
+++ b/ext/json/ext/generator/generator.c
@@ -24,7 +24,7 @@
 #ifdef HAVE_RUBY_ENCODING_H
 #include "ruby/encoding.h"
 #define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding())
-static VALUE mEncoding_UTF_8;
+static VALUE CEncoding_UTF_8;
 static ID i_encoding, i_encode;
 #else
 #define FORCE_UTF8(obj)
@@ -154,7 +154,7 @@ static int hash_to_json_i(VALUE key, VALUE value, VALUE buf)
 /*
  * call-seq: to_json(state = nil, depth = 0)
  *
- * Returns a JSON string containing a JSON object, that is unparsed from
+ * Returns a JSON string containing a JSON object, that is generated from
  * this Hash instance.
  * _state_ is a JSON::State object, that can also be used to configure the
  * produced JSON string output further.
@@ -260,7 +260,7 @@ inline static VALUE mArray_json_transfrom(VALUE self, VALUE Vstate, VALUE Vdepth
 /*
  * call-seq: to_json(state = nil, depth = 0)
  *
- * Returns a JSON string containing a JSON array, that is unparsed from
+ * Returns a JSON string containing a JSON array, that is generated from
  * this Array instance.
  * _state_ is a JSON::State object, that can also be used to configure the
  * produced JSON string output further.
@@ -360,14 +360,14 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
     VALUE result = rb_str_buf_new(RSTRING_LEN(self));
     rb_str_buf_cat2(result, "\"");
 #ifdef HAVE_RUBY_ENCODING_H
-    if (rb_funcall(self, i_encoding, 0) == mEncoding_UTF_8) {
-        JSON_convert_UTF8_to_JSON(result, self, strictConversion);
+    if (rb_funcall(self, i_encoding, 0) == CEncoding_UTF_8) {
+        JSON_convert_UTF8_to_JSON(result, self);
     } else {
-        VALUE string = rb_funcall(self, i_encode, 1, mEncoding_UTF_8);
-        JSON_convert_UTF8_to_JSON(result, string, strictConversion);
+        VALUE string = rb_funcall(self, i_encode, 1, CEncoding_UTF_8);
+        JSON_convert_UTF8_to_JSON(result, string);
     }
 #else
-    JSON_convert_UTF8_to_JSON(result, self, strictConversion);
+    JSON_convert_UTF8_to_JSON(result, self);
 #endif
     rb_str_buf_cat2(result, "\"");
     FORCE_UTF8(result);
@@ -378,7 +378,7 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
  * call-seq: to_json_raw_object()
  *
  * This method creates a raw object hash, that can be nested into
- * other data structures and will be unparsed as a raw string. This
+ * other data structures and will be generated as a raw string. This
  * method should be used, if you want to convert raw strings to JSON
  * instead of UTF-8 strings, e. g. binary data.
  */
@@ -856,17 +856,19 @@ static VALUE cState_forget(VALUE self, VALUE object)
 void Init_generator()
 {
     rb_require("json/common");
+
     mJSON = rb_define_module("JSON");
     mExt = rb_define_module_under(mJSON, "Ext");
     mGenerator = rb_define_module_under(mExt, "Generator");
+
     eGeneratorError = rb_path2class("JSON::GeneratorError");
     eCircularDatastructure = rb_path2class("JSON::CircularDatastructure");
     eNestingError = rb_path2class("JSON::NestingError");
+
     cState = rb_define_class_under(mGenerator, "State", rb_cObject);
     rb_define_alloc_func(cState, cState_s_allocate);
     rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
     rb_define_method(cState, "initialize", cState_initialize, -1);
-
     rb_define_method(cState, "indent", cState_indent, 0);
     rb_define_method(cState, "indent=", cState_indent_set, 1);
     rb_define_method(cState, "space", cState_space, 0);
@@ -928,7 +930,7 @@ void Init_generator()
     i_create_id = rb_intern("create_id");
     i_extend = rb_intern("extend");
 #ifdef HAVE_RUBY_ENCODING_H
-    mEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
+    CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
     i_encoding = rb_intern("encoding");
     i_encode = rb_intern("encode");
 #endif
diff --git a/ext/json/ext/generator/unicode.c b/ext/json/ext/generator/unicode.c
index 3ddfbe0..1d0b675 100644
--- a/ext/json/ext/generator/unicode.c
+++ b/ext/json/ext/generator/unicode.c
@@ -1,9 +1,5 @@
 #include "unicode.h"
 
-#define unicode_escape(buffer, character)          \
-    snprintf(buf, 7, "\\u%04x", (unsigned int) (character)); \
-         rb_str_buf_cat(buffer, buf, 6);
-
 /*
  * Copyright 2001-2004 Unicode, Inc.
  * 
@@ -53,15 +49,6 @@ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080
 		     0x03C82080UL, 0xFA082080UL, 0x82082080UL };
 
 /*
- * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
- * into the first byte, depending on how many bytes follow.  There are
- * as many entries in this table as there are UTF-8 sequence types.
- * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
- * for *legal* UTF-8 will be 4 or fewer bytes total.
- */
-static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-
-/*
  * Utility routine to tell whether a sequence of bytes is legal UTF-8.
  * This must be called with the length pre-determined by the first byte.
  * If not calling this from ConvertUTF8to*, then the length can be set by:
@@ -98,9 +85,21 @@ inline static unsigned char isLegalUTF8(const UTF8 *source, int length)
     return 1;
 }
 
-void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags)
+inline static void unicode_escape(VALUE buffer, UTF16 character)
+{
+    const char *digits = "0123456789abcdef";
+    char buf[7] = { '\\', 'u' };
+
+    buf[6] = 0; 
+    buf[2] = digits[character >> 12];
+    buf[3] = digits[(character >> 8) & 0xf];
+    buf[4] = digits[(character >> 4) & 0xf];
+    buf[5] = digits[character & 0xf];
+    rb_str_buf_cat(buffer, buf, 6);
+}
+
+inline void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string)
 {
-    char buf[7];
     const UTF8* source = (UTF8 *) RSTRING_PTR(string);
     const UTF8* sourceEnd = source + RSTRING_LEN(string);
 
@@ -131,45 +130,54 @@ void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags
         if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
             /* UTF-16 surrogate values are illegal in UTF-32 */
             if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-                if (flags == strictConversion) {
-                    source -= (extraBytesToRead+1); /* return to the illegal value itself */
-                    rb_raise(rb_path2class("JSON::GeneratorError"),
+#if UNI_STRICT_CONVERSION
+                source -= (extraBytesToRead+1); /* return to the illegal value itself */
+                rb_raise(rb_path2class("JSON::GeneratorError"),
                         "source sequence is illegal/malformed utf-8");
-                } else {
-                    unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
-                }
+#else
+                unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
+#endif
             } else {
                 /* normal case */
-                if (ch == '"') {
-                    rb_str_buf_cat2(buffer, "\\\"");
-                } else if (ch == '\\') {
-                    rb_str_buf_cat2(buffer, "\\\\");
-                } else if (ch >= 0x20 && ch <= 0x7f) {
-                    rb_str_buf_cat(buffer, (char *) source - 1, 1);
-                } else if (ch == '\n') {
-                    rb_str_buf_cat2(buffer, "\\n");
-                } else if (ch == '\r') {
-                    rb_str_buf_cat2(buffer, "\\r");
-                } else if (ch == '\t') {
-                    rb_str_buf_cat2(buffer, "\\t");
-                } else if (ch == '\f') {
-                    rb_str_buf_cat2(buffer, "\\f");
-                } else if (ch == '\b') {
-                    rb_str_buf_cat2(buffer, "\\b");
-                } else if (ch < 0x20) {
-                    unicode_escape(buffer, (UTF16) ch);
-                } else {
-                    unicode_escape(buffer, (UTF16) ch);
+                switch(ch) {
+                    case '\n':
+                        rb_str_buf_cat2(buffer, "\\n");
+                        break;
+                    case '\r':
+                        rb_str_buf_cat2(buffer, "\\r");
+                        break;
+                    case '\\':
+                        rb_str_buf_cat2(buffer, "\\\\");
+                        break;
+                    case '"':
+                        rb_str_buf_cat2(buffer, "\\\"");
+                        break;
+                    case '\t':
+                        rb_str_buf_cat2(buffer, "\\t");
+                        break;
+                    case '\f':
+                        rb_str_buf_cat2(buffer, "\\f");
+                        break;
+                    case '\b':
+                        rb_str_buf_cat2(buffer, "\\b");
+                        break;
+                    default:
+                        if (ch >= 0x20 && ch <= 0x7f) {
+                            rb_str_buf_cat(buffer, (char *) source - 1, 1);
+                        } else {
+                            unicode_escape(buffer, (UTF16) ch);
+                        }
+                        break;
                 }
             }
         } else if (ch > UNI_MAX_UTF16) {
-            if (flags == strictConversion) {
-                source -= (extraBytesToRead+1); /* return to the start */
-                rb_raise(rb_path2class("JSON::GeneratorError"),
-                        "source sequence is illegal/malformed utf8");
-            } else {
-                unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
-            }
+#if UNI_STRICT_CONVERSION
+            source -= (extraBytesToRead+1); /* return to the start */
+            rb_raise(rb_path2class("JSON::GeneratorError"),
+                    "source sequence is illegal/malformed utf8");
+#else
+            unicode_escape(buffer, UNI_REPLACEMENT_CHAR);
+#endif
         } else {
             /* target is a character in range 0xFFFF - 0x10FFFF. */
             ch -= halfBase;
diff --git a/ext/json/ext/generator/unicode.h b/ext/json/ext/generator/unicode.h
index 841474b..77b29a4 100644
--- a/ext/json/ext/generator/unicode.h
+++ b/ext/json/ext/generator/unicode.h
@@ -3,17 +3,7 @@
 #ifndef _GENERATOR_UNICODE_H_
 #define _GENERATOR_UNICODE_H_
 
-typedef enum {
-	conversionOK = 0, 	/* conversion successful */
-	sourceExhausted,	/* partial character in source, but hit end */
-	targetExhausted,	/* insuff. room in target for conversion */
-	sourceIllegal		/* source sequence is illegal/malformed */
-} ConversionResult;
-
-typedef enum {
-	strictConversion = 0,
-	lenientConversion
-} ConversionFlags;
+#define UNI_STRICT_CONVERSION 1
 
 typedef unsigned long	UTF32;	/* at least 32 bits */
 typedef unsigned short	UTF16;	/* at least 16 bits */
@@ -35,7 +25,7 @@ static const int halfShift  = 10; /* used for shifting by 10 bits */
 static const UTF32 halfBase = 0x0010000UL;
 static const UTF32 halfMask = 0x3FFUL;
 
-void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string, ConversionFlags flags);
+inline void JSON_convert_UTF8_to_JSON(VALUE buffer, VALUE string);
 
 #ifndef RARRAY_PTR
 #define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb
index 6226394..9662e9a 100644
--- a/ext/json/ext/parser/extconf.rb
+++ b/ext/json/ext/parser/extconf.rb
@@ -1,9 +1,12 @@
 require 'mkmf'
 require 'rbconfig'
 
+unless $CFLAGS.gsub!(/ -O[\dsz]?/, ' -O3')
+  $CFLAGS << ' -O3'
+end
 if CONFIG['CC'] =~ /gcc/
-  $CFLAGS += ' -Wall'
-  #$CFLAGS += ' -O0 -ggdb'
+  $CFLAGS << ' -Wall'
+  #$CFLAGS.gsub!(/ -O[\dsz]?/, ' -O0 -ggdb')
 end
 
 have_header("ruby/st.h") || have_header("st.h")
diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c
index 1781381..d2dfe52 100644
--- a/ext/json/ext/parser/parser.c
+++ b/ext/json/ext/parser/parser.c
@@ -1262,7 +1262,6 @@ static VALUE json_string_unescape(char *p, char *pe)
     while (p < pe) {
         if (*p == '\\') {
             p++;
-            if (p >= pe) return Qnil; /* raise an exception later, \ at end */
             switch (*p) {
                 case '"':
                 case '\\':
@@ -1293,7 +1292,7 @@ static VALUE json_string_unescape(char *p, char *pe)
                     if (p > pe - 4) { 
                         return Qnil;
                     } else {
-                        p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion);
+                        p = JSON_convert_UTF16_to_UTF8(result, p, pe);
                     }
                     break;
                 default:
@@ -1312,7 +1311,7 @@ static VALUE json_string_unescape(char *p, char *pe)
 }
 
 
-#line 1316 "parser.c"
+#line 1315 "parser.c"
 static const int JSON_string_start = 1;
 static const int JSON_string_first_final = 8;
 static const int JSON_string_error = 0;
@@ -1320,7 +1319,7 @@ static const int JSON_string_error = 0;
 static const int JSON_string_en_main = 1;
 
 
-#line 433 "parser.rl"
+#line 432 "parser.rl"
 
 
 static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
@@ -1329,15 +1328,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
 
     *result = rb_str_new("", 0);
     
-#line 1333 "parser.c"
+#line 1332 "parser.c"
 	{
 	cs = JSON_string_start;
 	}
 
-#line 441 "parser.rl"
+#line 440 "parser.rl"
     json->memo = p;
     
-#line 1341 "parser.c"
+#line 1340 "parser.c"
 	{
 	if ( p == pe )
 		goto _test_eof;
@@ -1362,7 +1361,7 @@ case 2:
 		goto st0;
 	goto st2;
 tr2:
-#line 419 "parser.rl"
+#line 418 "parser.rl"
 	{
         *result = json_string_unescape(json->memo + 1, p);
         if (NIL_P(*result)) {
@@ -1373,14 +1372,14 @@ tr2:
 			{p = (( p + 1))-1;}
 		}
 	}
-#line 430 "parser.rl"
+#line 429 "parser.rl"
 	{ p--; {p++; cs = 8; goto _out;} }
 	goto st8;
 st8:
 	if ( ++p == pe )
 		goto _test_eof8;
 case 8:
-#line 1384 "parser.c"
+#line 1383 "parser.c"
 	goto st0;
 st3:
 	if ( ++p == pe )
@@ -1456,7 +1455,7 @@ case 7:
 	_out: {}
 	}
 
-#line 443 "parser.rl"
+#line 442 "parser.rl"
 
     if (cs >= JSON_string_first_final) {
         return p + 1;
@@ -1467,7 +1466,7 @@ case 7:
 
 
 
-#line 1471 "parser.c"
+#line 1470 "parser.c"
 static const int JSON_start = 1;
 static const int JSON_first_final = 10;
 static const int JSON_error = 0;
@@ -1475,7 +1474,7 @@ static const int JSON_error = 0;
 static const int JSON_en_main = 1;
 
 
-#line 477 "parser.rl"
+#line 476 "parser.rl"
 
 
 /* 
@@ -1518,7 +1517,7 @@ inline static VALUE convert_encoding(VALUE source)
                 rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_16LE);
                 source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
             } else {
-                source = rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_8);
+                FORCE_UTF8(source);
             }
         } else {
             source = rb_funcall(source, i_encode, 1, mEncoding_UTF_8);
@@ -1647,16 +1646,16 @@ static VALUE cParser_parse(VALUE self)
     GET_STRUCT;
 
     
-#line 1651 "parser.c"
+#line 1650 "parser.c"
 	{
 	cs = JSON_start;
 	}
 
-#line 648 "parser.rl"
+#line 647 "parser.rl"
     p = json->source;
     pe = p + json->len;
     
-#line 1660 "parser.c"
+#line 1659 "parser.c"
 	{
 	if ( p == pe )
 		goto _test_eof;
@@ -1712,7 +1711,7 @@ case 5:
 		goto st1;
 	goto st5;
 tr3:
-#line 466 "parser.rl"
+#line 465 "parser.rl"
 	{
         char *np;
         json->current_nesting = 1;
@@ -1721,7 +1720,7 @@ tr3:
     }
 	goto st10;
 tr4:
-#line 459 "parser.rl"
+#line 458 "parser.rl"
 	{
         char *np;
         json->current_nesting = 1;
@@ -1733,7 +1732,7 @@ st10:
 	if ( ++p == pe )
 		goto _test_eof10;
 case 10:
-#line 1737 "parser.c"
+#line 1736 "parser.c"
 	switch( (*p) ) {
 		case 13: goto st10;
 		case 32: goto st10;
@@ -1790,7 +1789,7 @@ case 9:
 	_out: {}
 	}
 
-#line 651 "parser.rl"
+#line 650 "parser.rl"
 
     if (cs >= JSON_first_final && p == pe) {
         return result;
diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl
index 7de7bb1..8eca179 100644
--- a/ext/json/ext/parser/parser.rl
+++ b/ext/json/ext/parser/parser.rl
@@ -19,8 +19,8 @@
 #ifdef HAVE_RUBY_ENCODING_H
 #include "ruby/encoding.h"
 #define FORCE_UTF8(obj) rb_enc_associate((obj), rb_utf8_encoding())
-static VALUE mEncoding_ASCII_8BIT, mEncoding_UTF_8, mEncoding_UTF_16BE,
-    mEncoding_UTF_16LE, mEncoding_UTF_32BE, mEncoding_UTF_32LE;
+static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE,
+    CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE;
 static ID i_encoding, i_encode, i_encode_bang, i_force_encoding;
 #else
 #define FORCE_UTF8(obj)
@@ -361,7 +361,6 @@ static VALUE json_string_unescape(char *p, char *pe)
     while (p < pe) {
         if (*p == '\\') {
             p++;
-            if (p >= pe) return Qnil; /* raise an exception later, \ at end */
             switch (*p) {
                 case '"':
                 case '\\':
@@ -392,7 +391,7 @@ static VALUE json_string_unescape(char *p, char *pe)
                     if (p > pe - 4) { 
                         return Qnil;
                     } else {
-                        p = JSON_convert_UTF16_to_UTF8(result, p, pe, strictConversion);
+                        p = JSON_convert_UTF16_to_UTF8(result, p, pe);
                     }
                     break;
                 default:
@@ -498,28 +497,28 @@ inline static VALUE convert_encoding(VALUE source)
 #ifdef HAVE_RUBY_ENCODING_H
     {
         VALUE encoding = rb_funcall(source, i_encoding, 0);
-        if (encoding == mEncoding_ASCII_8BIT) {
+        if (encoding == CEncoding_ASCII_8BIT) {
             if (len >= 4 &&  ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
                 source = rb_str_dup(source);
-                rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_32BE);
-                source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
+                rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32BE);
+                source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
             } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
                 source = rb_str_dup(source);
-                rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_16BE);
-                source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
+                rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16BE);
+                source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
             } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
                 source = rb_str_dup(source);
-                rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_32LE);
-                source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
+                rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32LE);
+                source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
             } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
                 source = rb_str_dup(source);
-                rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_16LE);
-                source = rb_funcall(source, i_encode_bang, 1, mEncoding_UTF_8);
+                rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16LE);
+                source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
             } else {
-                source = rb_funcall(source, i_force_encoding, 1, mEncoding_UTF_8);
+                FORCE_UTF8(source);
             }
         } else {
-            source = rb_funcall(source, i_encode, 1, mEncoding_UTF_8);
+            source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8);
         }
     }
 #else
@@ -721,12 +720,12 @@ void Init_parser()
     i_object_class = rb_intern("object_class");
     i_array_class = rb_intern("array_class");
 #ifdef HAVE_RUBY_ENCODING_H
-    mEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
-    mEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be"));
-    mEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le"));
-    mEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
-    mEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
-    mEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
+    CEncoding_UTF_8 = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-8"));
+    CEncoding_UTF_16BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16be"));
+    CEncoding_UTF_16LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-16le"));
+    CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
+    CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
+    CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
     i_encoding = rb_intern("encoding");
     i_encode = rb_intern("encode");
     i_encode_bang = rb_intern("encode!");
diff --git a/ext/json/ext/parser/unicode.c b/ext/json/ext/parser/unicode.c
index 6bd29e2..711aac5 100644
--- a/ext/json/ext/parser/unicode.c
+++ b/ext/json/ext/parser/unicode.c
@@ -23,32 +23,6 @@
  */
 
 /*
- * Index into the table below with the first byte of a UTF-8 sequence to
- * get the number of trailing bytes that are supposed to follow it.
- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
- * left as-is for anyone who may want to do such conversion, which was
- * allowed in earlier algorithms.
- */
-static const char trailingBytesForUTF8[256] = {
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
-};
-
-/*
- * Magic values subtracted from a buffer value during UTF8 conversion.
- * This table contains as many values as there might be trailing bytes
- * in a UTF-8 sequence.
- */
-static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
-             0x03C82080UL, 0xFA082080UL, 0x82082080UL };
-
-/*
  * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
  * into the first byte, depending on how many bytes follow.  There are
  * as many entries in this table as there are UTF-8 sequence types.
@@ -57,34 +31,61 @@ static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080
  */
 static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 
+static const char digit_values[256] = { 
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
+    -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1
+};
+
 char *JSON_convert_UTF16_to_UTF8 (
         VALUE buffer,
         char *source,
-        char *sourceEnd,
-        ConversionFlags flags)
+        char *sourceEnd)
 {
     UTF16 *tmp, *tmpPtr, *tmpEnd;
     char buf[5];
-    long n = 0, i;
-    char *p = source - 1;
+    long n = 0;
+    char failed = 1, c, *p = source - 1;
 
     while (p < sourceEnd && p[0] == '\\' && p[1] == 'u') {
         p += 6;
         n++;
     }
     p = source + 1;
-    buf[4] = 0;
     tmpPtr = tmp = ALLOC_N(UTF16, n);
     tmpEnd = tmp + n;
-    for (i = 0; i < n; i++) {
-        buf[0] = *p++;
-        buf[1] = *p++;
-        buf[2] = *p++;
-        buf[3] = *p++;
-        tmpPtr[i] = (UTF16)strtol(buf, NULL, 16);
+    while (tmpPtr < tmpEnd) {
+        c = digit_values[(unsigned char) *p++];
+        failed *= c;
+        *tmpPtr = c << 12;
+        c = digit_values[(unsigned char) *p++];
+        failed *= c;
+        *tmpPtr |= c << 8;
+        c = digit_values[(unsigned char) *p++];
+        failed *= c;
+        *tmpPtr |= c << 4;
+        c = digit_values[(unsigned char) *p++];
+        failed *= c;
+        *tmpPtr++ |= c;
         p += 2;
     }
+    if (failed < 0) {
+        rb_raise(rb_path2class("JSON::ParserError"),
+                "illegal \\uXXXX unicode value near %s", source);
+    }
 
+    tmpPtr = tmp;
     while (tmpPtr < tmpEnd) {
         UTF32 ch;
         unsigned short bytesToWrite = 0;
@@ -102,10 +103,6 @@ char *JSON_convert_UTF16_to_UTF8 (
                     ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
                         + (ch2 - UNI_SUR_LOW_START) + halfBase;
                     ++tmpPtr;
-                } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
-                    ruby_xfree(tmp);
-                    rb_raise(rb_path2class("JSON::ParserError"),
-                            "\\uXXXX is illegal/malformed utf-16 near %s", source);
                 }
             } else { /* We don't have the 16 bits following the high surrogate. */
                 ruby_xfree(tmp);
@@ -113,13 +110,6 @@ char *JSON_convert_UTF16_to_UTF8 (
                     "partial character in source, but hit end near %s", source);
                 break;
             }
-        } else if (flags == strictConversion) {
-            /* UTF-16 surrogate values are illegal in UTF-32 */
-            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
-                ruby_xfree(tmp);
-                rb_raise(rb_path2class("JSON::ParserError"),
-                    "\\uXXXX is illegal/malformed utf-16 near %s", source);
-            }
         }
         /* Figure out how many bytes the result will require */
         if (ch < (UTF32) 0x80) {
@@ -149,6 +139,6 @@ char *JSON_convert_UTF16_to_UTF8 (
         rb_str_buf_cat(buffer, p, bytesToWrite);
     }
     ruby_xfree(tmp);
-    source += 5 + (n - 1) * 6;
+    source += 6 * n - 1;
     return source;
 }
diff --git a/ext/json/ext/parser/unicode.h b/ext/json/ext/parser/unicode.h
index 155da0c..40de426 100644
--- a/ext/json/ext/parser/unicode.h
+++ b/ext/json/ext/parser/unicode.h
@@ -31,16 +31,10 @@ typedef enum {
 	sourceIllegal		/* source sequence is illegal/malformed */
 } ConversionResult;
 
-typedef enum {
-	strictConversion = 0,
-	lenientConversion
-} ConversionFlags;
-
 char *JSON_convert_UTF16_to_UTF8 (
     VALUE buffer,
     char *source,
-    char *sourceEnd,
-		ConversionFlags flags);
+    char *sourceEnd);
 
 #ifndef RARRAY_PTR
 #define RARRAY_PTR(ARRAY) RARRAY(ARRAY)->ptr
author	Florian Frank <flori@ping.de>	2009-10-20 13:04:24 +0200
committer	Florian Frank <flori@ping.de>	2009-10-26 22:56:27 +0100
commit	41ae3d70d6dd141759eb6f3fddf460b327a90796 (patch)
tree	54431a12877a97a50f8fb077a03b6e1cdecb9f12
parent	3a13313e9d231e7e3f99101812825dbe3d01d13a (diff)
download	json-41ae3d70d6dd141759eb6f3fddf460b327a90796.tar.gz