diff options
author | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-09-08 15:59:16 +0000 |
---|---|---|
committer | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-09-08 15:59:16 +0000 |
commit | 6bf2fb4aabe8133028d7bc189e67cd26604f9987 (patch) | |
tree | 97584fc128ac569ceaf97af3b8d3092fc0f49c32 | |
parent | e1821933a34a4fb5019f10017f10bd98db983d90 (diff) | |
download | ruby-6bf2fb4aabe8133028d7bc189e67cd26604f9987.tar.gz |
* merged from trunk r19235:19258.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/mvm@19259 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | .merged-trunk-revision | 2 | ||||
-rw-r--r-- | ChangeLog | 93 | ||||
-rw-r--r-- | dir.c | 30 | ||||
-rw-r--r-- | enc/trans/escape.trans | 2 | ||||
-rw-r--r-- | enc/trans/iso2022.trans | 8 | ||||
-rw-r--r-- | enc/trans/japanese.trans | 4 | ||||
-rw-r--r-- | enc/trans/newline.trans | 6 | ||||
-rw-r--r-- | enc/trans/utf_16_32.trans | 16 | ||||
-rw-r--r-- | include/ruby/encoding.h | 5 | ||||
-rw-r--r-- | io.c | 2 | ||||
-rw-r--r-- | lib/cgi.rb | 62 | ||||
-rw-r--r-- | process.c | 4 | ||||
-rw-r--r-- | re.c | 8 | ||||
-rwxr-xr-x | test/cgi/test_cgi_tag_helper.rb | 2 | ||||
-rw-r--r-- | test/ruby/test_econv.rb | 35 | ||||
-rw-r--r-- | test/ruby/test_io.rb | 4 | ||||
-rw-r--r-- | test/ruby/test_transcode.rb | 2 | ||||
-rw-r--r-- | tool/transcode-tblgen.rb | 25 | ||||
-rw-r--r-- | transcode.c | 117 | ||||
-rw-r--r-- | transcode_data.h | 12 | ||||
-rw-r--r-- | vm.c | 1 | ||||
-rw-r--r-- | win32/Makefile.sub | 3 | ||||
-rw-r--r-- | win32/win32.c | 31 |
23 files changed, 296 insertions, 178 deletions
diff --git a/.merged-trunk-revision b/.merged-trunk-revision index 985c22d757..94a0018130 100644 --- a/.merged-trunk-revision +++ b/.merged-trunk-revision @@ -1 +1 @@ -19235 +19258 @@ -19,14 +19,107 @@ Tue Sep 9 00:55:34 2008 Nobuyoshi Nakada <nobu@ruby-lang.org> * st.c (st_init_table_with_size, ADD_DIRECT): fixed typo. +Tue Sep 9 00:20:10 2008 Tanaka Akira <akr@fsij.org> + + * transcode.c (rb_econv_asciicompat_encoding): check decoder. + +Tue Sep 9 00:00:47 2008 Tanaka Akira <akr@fsij.org> + + * transcode.c (rb_econv_t): last_error.partial_input removed. + +Mon Sep 8 23:24:54 2008 Tanaka Akira <akr@fsij.org> + + * include/ruby/encoding.h (rb_econv_asciicompat_encoding): renamed + from rb_econv_stateless_encoding to apply stateless ASCII + incompatible encodings such as UTF-16BE. + + * io.c (make_writeconv): use rb_econv_asciicompat_encoding. + + * transcode_data.h (rb_transcoder_asciicompat_type_t): renamed from + rb_transcoder_stateful_type_t. + (rb_transcoder): use rb_transcoder_asciicompat_type_t. + + * transcode.c: follow the type change. + (asciicompat_encoding_i): renamed from stateless_encoding_i. + (rb_econv_asciicompat_encoding): renamed from + rb_econv_stateless_encoding. + (econv_s_asciicompat_encoding): method renamed. + + * tool/transcode-tblgen.rb: follow the type change. + + * enc/trans/utf_16_32.trans: follow the type change. + rb_from_UTF_16BE to UTF-8 is asciicompat_decoder. + rb_from_UTF_16LE to UTF-8 is asciicompat_decoder. + rb_from_UTF_32BE to UTF-8 is asciicompat_decoder. + rb_from_UTF_32LE to UTF-8 is asciicompat_decoder. + UTF-8 to rb_to_UTF_16BE is asciicompat_encoder. + UTF-8 to rb_to_UTF_16LE is asciicompat_encoder. + UTF-8 to rb_to_UTF_32BE is asciicompat_encoder. + UTF-8 to rb_to_UTF_32LE is asciicompat_encoder. + + * enc/trans/newline.trans: follow the type change. universal newline + decoder is asciicompat_converter. + + * enc/trans/escape.trans: follow the type change. + + * enc/trans/iso2022.trans: ditto. + + * enc/trans/japanese.trans: ditto. + +Mon Sep 8 23:05:42 2008 Tanaka Akira <akr@fsij.org> + + * transcode.c (rb_econv_insert_output): "readagain" part should be + after replacement. + +Mon Sep 8 22:30:03 2008 NARUSE, Yui <naruse@ruby-lang.org> + + * dir.c (dir_initialize): rename option name of Dir.open + from :external_encoding to :encoding. + +Mon Sep 8 22:16:20 2008 Takeyuki FUJIOKA <xibbar@ruby-lang.org> + + * lib/cgi.rb : obsolete regex "n" option. [ruby-dev:36130] + Mon Sep 8 20:10:12 2008 Nobuyoshi Nakada <nobu@ruby-lang.org> * bignum.c (power_cache_get_power0): no need to register address. +Mon Sep 8 18:13:20 2008 NAKAMURA Usaku <usa@ruby-lang.org> + + * win32/win32.c (getppid): typo. [ruby-dev:36202] + + * process.c (get_ppid): mention the return value on Windows. + +Mon Sep 8 18:15:59 2008 Tanaka Akira <akr@fsij.org> + + * tool/transcode-tblgen.rb (ArrayCode): less string substitutions. + +Mon Sep 8 18:13:13 2008 Tanaka Akira <akr@fsij.org> + + * vm.c (rb_mRubyVMFrozenCore): registered for GC. + + * re.c (rb_reg_preprocess_dregexp): fix GC problem on MacOS X with + powerpc-apple-darwin8-gcc-4.0.1 (GCC) 4.0.1 (Apple Computer, Inc. + build 5367). + +Mon Sep 8 18:09:07 2008 NAKAMURA Usaku <usa@ruby-lang.org> + + * win32/Makefile.sub (config.h): define SIZE_MAX for VC++6/7. + +Mon Sep 8 17:46:09 2008 NAKAMURA Usaku <usa@ruby-lang.org> + + * win32/win32.c (filetime_to_unixtime): remove unused variable. + [ruby-dev:36191] + Mon Sep 8 17:26:51 2008 Nobuyoshi Nakada <nobu@ruby-lang.org> * st.c (garbage_collect): checks if memory can be reclaimed. +Mon Sep 8 13:47:39 2008 Kazuhiro NISHIYAMA <zn@mbf.nifty.com> + + * test/ruby/test_io.rb (TestIO#test_dup): add open in block. + see [ruby-dev:35957]. + Mon Sep 8 07:09:42 2008 Tadayoshi Funaba <tadf@dotrb.org> * complex.c: some adjustments. @@ -295,7 +295,7 @@ fnmatch( struct dir_data { DIR *dir; VALUE path; - rb_encoding *extenc; + rb_encoding *enc; }; static void @@ -323,7 +323,7 @@ dir_s_alloc(VALUE klass) dirp->dir = NULL; dirp->path = Qnil; - dirp->extenc = NULL; + dirp->enc = NULL; return obj; } @@ -338,26 +338,26 @@ static VALUE dir_initialize(int argc, VALUE *argv, VALUE dir) { struct dir_data *dp; - rb_encoding *extencoding; + rb_encoding *fsenc; VALUE dirname, opt; - static VALUE sym_extenc; + static VALUE sym_enc; - if (!sym_extenc) { - sym_extenc = ID2SYM(rb_intern("external_encoding")); + if (!sym_enc) { + sym_enc = ID2SYM(rb_intern("encoding")); } - extencoding = rb_filesystem_encoding(); + fsenc = rb_filesystem_encoding(); rb_scan_args(argc, argv, "11", &dirname, &opt); if (!NIL_P(opt)) { - VALUE v, extenc=Qnil; + VALUE v, enc=Qnil; opt = rb_convert_type(opt, T_HASH, "Hash", "to_hash"); - v = rb_hash_aref(opt, sym_extenc); - if (!NIL_P(v)) extenc = v; + v = rb_hash_aref(opt, sym_enc); + if (!NIL_P(v)) enc = v; - if (!NIL_P(extenc)) { - extencoding = rb_to_encoding(extenc); + if (!NIL_P(enc)) { + fsenc = rb_to_encoding(enc); } } @@ -367,7 +367,7 @@ dir_initialize(int argc, VALUE *argv, VALUE dir) if (dp->dir) closedir(dp->dir); dp->dir = NULL; dp->path = Qnil; - dp->extenc = extencoding; + dp->enc = fsenc; dp->dir = opendir(RSTRING_PTR(dirname)); if (dp->dir == NULL) { if (errno == EMFILE || errno == ENFILE) { @@ -499,7 +499,7 @@ dir_read(VALUE dir) errno = 0; dp = readdir(dirp->dir); if (dp) { - return dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->extenc); + return dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->enc); } else if (errno == 0) { /* end of stream */ return Qnil; @@ -537,7 +537,7 @@ dir_each(VALUE dir) GetDIR(dir, dirp); rewinddir(dirp->dir); for (dp = readdir(dirp->dir); dp != NULL; dp = readdir(dirp->dir)) { - rb_yield(dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->extenc)); + rb_yield(dir_enc_str_new(dp->d_name, NAMLEN(dp), dirp->enc)); if (dirp->dir == NULL) dir_closed(); } return dir; diff --git a/enc/trans/escape.trans b/enc/trans/escape.trans index cc1513724b..63836c37a6 100644 --- a/enc/trans/escape.trans +++ b/enc/trans/escape.trans @@ -79,7 +79,7 @@ rb_escape_xml_attr_quote = { 1, /* input_unit_length */ 1, /* max_input */ 7, /* max_output */ - stateful_encoder, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 1, escape_xml_attr_quote_init, escape_xml_attr_quote_init, NULL, NULL, NULL, fun_so_escape_xml_attr_quote, escape_xml_attr_quote_finish diff --git a/enc/trans/iso2022.trans b/enc/trans/iso2022.trans index b3dd7f0742..f8a9c2c2e1 100644 --- a/enc/trans/iso2022.trans +++ b/enc/trans/iso2022.trans @@ -114,7 +114,7 @@ rb_iso2022jp_decoder = { 1, /* input_unit_length */ 3, /* max_input */ 3, /* max_output */ - stateful_decoder, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */ NULL, fun_si_iso2022jp_decoder, NULL, fun_so_iso2022jp_decoder }; @@ -196,7 +196,7 @@ rb_iso2022jp_encoder = { 1, /* input_unit_length */ 3, /* max_input */ 5, /* max_output */ - stateful_encoder, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_iso2022jp_encoder, finish_iso2022jp_encoder, @@ -218,7 +218,7 @@ rb_stateless_iso2022jp_to_eucjp = { 1, /* input_unit_length */ 3, /* max_input */ 2, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_stateless_iso2022jp_to_eucjp, }; @@ -239,7 +239,7 @@ rb_eucjp_to_stateless_iso2022jp = { 1, /* input_unit_length */ 3, /* max_input */ 3, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_eucjp_to_stateless_iso2022jp, }; diff --git a/enc/trans/japanese.trans b/enc/trans/japanese.trans index 64f38fbfc6..12321f587f 100644 --- a/enc/trans/japanese.trans +++ b/enc/trans/japanese.trans @@ -73,7 +73,7 @@ rb_eucjp2sjis = { 1, /* input_unit_length */ 3, /* max_input */ 2, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_eucjp2sjis }; @@ -85,7 +85,7 @@ rb_sjis2eucjp = { 1, /* input_unit_length */ 2, /* max_input */ 2, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_sjis2eucjp }; diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans index 9e5229f9a6..db49a9f003 100644 --- a/enc/trans/newline.trans +++ b/enc/trans/newline.trans @@ -92,7 +92,7 @@ rb_universal_newline = { 1, /* input_unit_length */ 1, /* max_input */ 1, /* max_output */ - stateful_decoder, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_universal_newline, universal_newline_finish @@ -105,7 +105,7 @@ rb_crlf_newline = { 1, /* input_unit_length */ 1, /* max_input */ 2, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL }; @@ -117,7 +117,7 @@ rb_cr_newline = { 1, /* input_unit_length */ 1, /* max_input */ 1, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL }; diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans index 9ffff341a9..88648266c6 100644 --- a/enc/trans/utf_16_32.trans +++ b/enc/trans/utf_16_32.trans @@ -266,7 +266,7 @@ rb_from_UTF_16BE = { 2, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_16be }; @@ -278,7 +278,7 @@ rb_to_UTF_16BE = { 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_16be }; @@ -290,7 +290,7 @@ rb_from_UTF_16LE = { 2, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_16le }; @@ -302,7 +302,7 @@ rb_to_UTF_16LE = { 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_16le }; @@ -314,7 +314,7 @@ rb_from_UTF_32BE = { 4, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_32be }; @@ -326,7 +326,7 @@ rb_to_UTF_32BE = { 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_32be }; @@ -338,7 +338,7 @@ rb_from_UTF_32LE = { 4, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_decoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_32le }; @@ -350,7 +350,7 @@ rb_to_UTF_32LE = { 1, /* input_unit_length */ 4, /* max_input */ 4, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_encoder, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_32le }; diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index bcbe8152d4..1e48877291 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -238,8 +238,9 @@ void rb_econv_check_error(rb_econv_t *ec); int rb_econv_putbackable(rb_econv_t *ec); void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n); -/* returns corresponding stateless encoding, or NULL if not stateful. */ -const char *rb_econv_stateless_encoding(const char *stateful_enc); +/* returns the corresponding ASCII compatible encoding for encname, + * or NULL if encname is not ASCII incompatible encoding. */ +const char *rb_econv_asciicompat_encoding(const char *encname); VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags); VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags); @@ -715,7 +715,7 @@ make_writeconv(rb_io_t *fptr) } else { enc = fptr->encs.enc2 ? fptr->encs.enc2 : fptr->encs.enc; - senc = rb_econv_stateless_encoding(enc->name); + senc = rb_econv_asciicompat_encoding(enc->name); if (!senc && !(fptr->encs.ecflags & ECONV_STATEFUL_ENCODER_MASK)) { /* single conversion */ fptr->writeconv_pre_ecflags = ecflags; diff --git a/lib/cgi.rb b/lib/cgi.rb index 2de068ce1e..4ae060d5ce 100644 --- a/lib/cgi.rb +++ b/lib/cgi.rb @@ -286,7 +286,7 @@ class CGI REVISION = '$Id$' #:nodoc: - NEEDS_BINMODE = true if /WIN/ni.match(RUBY_PLATFORM) + NEEDS_BINMODE = true if /WIN/i.match(RUBY_PLATFORM) # Path separators in different environments. PATH_SEPARATOR = {'UNIX'=>'/', 'WINDOWS'=>'\\', 'MACINTOSH'=>':'} @@ -441,7 +441,7 @@ class CGI def CGI::escapeElement(string, *elements) elements = elements[0] if elements[0].kind_of?(Array) unless elements.empty? - string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/ni) do + string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do CGI::escapeHTML($&) end else @@ -462,7 +462,7 @@ class CGI def CGI::unescapeElement(string, *elements) elements = elements[0] if elements[0].kind_of?(Array) unless elements.empty? - string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/ni) do + string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do CGI::unescapeHTML($&) end else @@ -586,7 +586,7 @@ class CGI options.delete("nph") if defined?(MOD_RUBY) if options.delete("nph") or - (/IIS\/(\d+)/n.match(env_table['SERVER_SOFTWARE']) and $1.to_i < 5) + (/IIS\/(\d+)/.match(env_table['SERVER_SOFTWARE']) and $1.to_i < 5) buf += (env_table["SERVER_PROTOCOL"] or "HTTP/1.0") + " " + (HTTP_STATUS[options["status"]] or options["status"] or "200 OK") + EOL + @@ -657,19 +657,19 @@ class CGI if defined?(MOD_RUBY) table = Apache::request.headers_out - buf.scan(/([^:]+): (.+)#{EOL}/n){ |name, value| + buf.scan(/([^:]+): (.+)#{EOL}/){ |name, value| warn sprintf("name:%s value:%s\n", name, value) if $DEBUG case name when 'Set-Cookie' table.add(name, value) - when /^status$/ni + when /^status$/i Apache::request.status_line = value Apache::request.status = value.to_i - when /^content-type$/ni + when /^content-type$/i Apache::request.content_type = value - when /^content-encoding$/ni + when /^content-encoding$/i Apache::request.content_encoding = value - when /^location$/ni + when /^location$/i if Apache::request.status == 200 Apache::request.status = 302 end @@ -911,7 +911,7 @@ class CGI def CGI::parse(query) params = Hash.new([].freeze) - query.split(/[&;]/n).each do |pairs| + query.split(/[&;]/).each do |pairs| key, value = pairs.split('=',2).collect{|v| CGI::unescape(v) } if params.has_key?(key) params[key].push(value) @@ -940,7 +940,7 @@ class CGI module QueryExtension %w[ CONTENT_LENGTH SERVER_PORT ].each do |env| - define_method(env.sub(/^HTTP_/n, '').downcase) do + define_method(env.sub(/^HTTP_/, '').downcase) do (val = env_table[env]) && Integer(val) end end @@ -953,7 +953,7 @@ class CGI HTTP_ACCEPT HTTP_ACCEPT_CHARSET HTTP_ACCEPT_ENCODING HTTP_ACCEPT_LANGUAGE HTTP_CACHE_CONTROL HTTP_FROM HTTP_HOST HTTP_NEGOTIATE HTTP_PRAGMA HTTP_REFERER HTTP_USER_AGENT ].each do |env| - define_method(env.sub(/^HTTP_/n, '').downcase) do + define_method(env.sub(/^HTTP_/, '').downcase) do env_table[env] end end @@ -1004,9 +1004,9 @@ class CGI head = nil body = MorphingBody.new - until head and /#{quoted_boundary}(?:#{EOL}|--)/n.match(buf) - if (not head) and /#{EOL}#{EOL}/n.match(buf) - buf = buf.sub(/\A((?:.|\n)*?#{EOL})#{EOL}/n) do + until head and /#{quoted_boundary}(?:#{EOL}|--)/.match(buf) + if (not head) and /#{EOL}#{EOL}/.match(buf) + buf = buf.sub(/\A((?:.|\n)*?#{EOL})#{EOL}/) do head = $1.dup "" end @@ -1030,7 +1030,7 @@ class CGI content_length -= c.bytesize end - buf = buf.sub(/\A((?:.|\n)*?)(?:[\r\n]{1,2})?#{quoted_boundary}([\r\n]{1,2}|--)/n) do + buf = buf.sub(/\A((?:.|\n)*?)(?:[\r\n]{1,2})?#{quoted_boundary}([\r\n]{1,2}|--)/) do body.print $1 if "--" == $2 content_length = -1 @@ -1041,15 +1041,15 @@ class CGI body.rewind - /Content-Disposition:.* filename=(?:"((?:\\.|[^\"])*)"|([^;\s]*))/ni.match(head) + /Content-Disposition:.* filename=(?:"((?:\\.|[^\"])*)"|([^;\s]*))/i.match(head) filename = ($1 or $2 or "") - if /Mac/ni.match(env_table['HTTP_USER_AGENT']) and - /Mozilla/ni.match(env_table['HTTP_USER_AGENT']) and - (not /MSIE/ni.match(env_table['HTTP_USER_AGENT'])) + if /Mac/i.match(env_table['HTTP_USER_AGENT']) and + /Mozilla/i.match(env_table['HTTP_USER_AGENT']) and + (not /MSIE/i.match(env_table['HTTP_USER_AGENT'])) filename = CGI::unescape(filename) end - /Content-Type: ([^\s]*)/ni.match(head) + /Content-Type: ([^\s]*)/i.match(head) content_type = ($1 or "") (class << body; self; end).class_eval do @@ -1058,7 +1058,7 @@ class CGI define_method(:content_type) {content_type.dup.taint} end - /Content-Disposition:.* name="?([^\";\s]*)"?/ni.match(head) + /Content-Disposition:.* name="?([^\";\s]*)"?/i.match(head) name = ($1 || "").dup if params.has_key?(name) @@ -1087,12 +1087,12 @@ class CGI %|(offline mode: enter name=value pairs on standard input)\n| ) end - readlines.join(' ').gsub(/\n/n, '') - end.gsub(/\\=/n, '%3D').gsub(/\\&/n, '%26') + readlines.join(' ').gsub(/\n/, '') + end.gsub(/\\=/, '%3D').gsub(/\\&/, '%26') words = Shellwords.shellwords(string) - if words.find{|x| /=/n.match(x) } + if words.find{|x| /=/.match(x) } words.join('&') else words.join('+') @@ -1159,7 +1159,7 @@ class CGI # Reads query parameters in the @params field, and cookies into @cookies. def initialize_query() if ("POST" == env_table['REQUEST_METHOD']) and - %r|\Amultipart/form-data.*boundary=\"?([^\";,]+)\"?|n.match(env_table['CONTENT_TYPE']) + %r|\Amultipart/form-data.*boundary=\"?([^\";,]+)\"?|.match(env_table['CONTENT_TYPE']) boundary = $1.dup @multipart = true @params = read_multipart(boundary, Integer(env_table['CONTENT_LENGTH'])) @@ -1245,14 +1245,14 @@ class CGI # # </HTML> # def CGI::pretty(string, shift = " ") - lines = string.gsub(/(?!\A)<(?:.|\n)*?>/n, "\n\\0").gsub(/<(?:.|\n)*?>(?!\n)/n, "\\0\n") + lines = string.gsub(/(?!\A)<(?:.|\n)*?>/, "\n\\0").gsub(/<(?:.|\n)*?>(?!\n)/, "\\0\n") end_pos = 0 - while end_pos = lines.index(/^<\/(\w+)/n, end_pos) + while end_pos = lines.index(/^<\/(\w+)/, end_pos) element = $1.dup - start_pos = lines.rindex(/^\s*<#{element}/ni, end_pos) - lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/n, "\n" + shift) + "__" + start_pos = lines.rindex(/^\s*<#{element}/i, end_pos) + lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__" end - lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/n, '\1') + lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1') end @@ -158,8 +158,8 @@ get_pid(void) * call-seq: * Process.ppid => fixnum * - * Returns the process id of the parent of this process. Always - * returns 0 on NT. Not available on all platforms. + * Returns the process id of the parent of this process. Returns + * untrustworthy value on Win32/64. Not available on all platforms. * * puts "I am #{Process.pid}" * Process.fork { puts "Dad is #{Process.ppid}" } @@ -2243,15 +2243,13 @@ rb_reg_preprocess_dregexp(VALUE ary) onig_errmsg_buffer err = ""; int i; VALUE result = 0; - int argc = RARRAY_LEN(ary); - VALUE *argv = RARRAY_PTR(ary); - if (argc == 0) { + if (RARRAY_LEN(ary) == 0) { rb_raise(rb_eArgError, "no arguments given"); } - for (i = 0; i < argc; i++) { - VALUE str = argv[i]; + for (i = 0; i < RARRAY_LEN(ary); i++) { + VALUE str = RARRAY_PTR(ary)[i]; VALUE buf; char *p, *end; rb_encoding *src_enc; diff --git a/test/cgi/test_cgi_tag_helper.rb b/test/cgi/test_cgi_tag_helper.rb index 2651d14a8c..3ff2f54d42 100755 --- a/test/cgi/test_cgi_tag_helper.rb +++ b/test/cgi/test_cgi_tag_helper.rb @@ -3,7 +3,7 @@ require 'cgi' require 'stringio' -class CGICoreTest < Test::Unit::TestCase +class CGITagHelperTest < Test::Unit::TestCase def setup diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb index 514cac568c..1b2586a040 100644 --- a/test/ruby/test_econv.rb +++ b/test/ruby/test_econv.rb @@ -27,20 +27,24 @@ class TestEncodingConverter < Test::Unit::TestCase ec.primitive_errinfo) end - def test_s_stateless_encoding - assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.stateless_encoding("ISO-2022-JP")) - assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.stateless_encoding(Encoding::ISO_2022_JP)) - assert_nil(Encoding::Converter.stateless_encoding("EUC-JP")) - assert_nil(Encoding::Converter.stateless_encoding("UTF-8")) - assert_nil(Encoding::Converter.stateless_encoding("UTF-16BE")) - assert_nil(Encoding::Converter.stateless_encoding(Encoding::UTF_8)) - assert_nil(Encoding::Converter.stateless_encoding("xml-attr-escaped")) - end - - def test_stateless_encoding_iso2022jp - slenc = Encoding::Converter.stateless_encoding("ISO-2022-JP") + def test_s_asciicompat_encoding + assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding("ISO-2022-JP")) + assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding(Encoding::ISO_2022_JP)) + assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16BE")) + assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16LE")) + assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32BE")) + assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32LE")) + assert_nil(Encoding::Converter.asciicompat_encoding("EUC-JP")) + assert_nil(Encoding::Converter.asciicompat_encoding("UTF-8")) + assert_nil(Encoding::Converter.asciicompat_encoding(Encoding::UTF_8)) + assert_nil(Encoding::Converter.asciicompat_encoding("xml-attr-escaped")) + assert_nil(Encoding::Converter.asciicompat_encoding("encoding-not-exist")) + end + + def test_asciicompat_encoding_iso2022jp + acenc = Encoding::Converter.asciicompat_encoding("ISO-2022-JP") str = "\e$B~~\(B".force_encoding("iso-2022-jp") - str2 = str.encode(slenc) + str2 = str.encode(acenc) str3 = str.encode("ISO-2022-JP") assert_equal(str, str3) end @@ -798,4 +802,9 @@ class TestEncodingConverter < Test::Unit::TestCase assert_equal("&\u3046\u2661&\"'".force_encoding("utf-8"), "&\u3046\u2661&\"'".encode("utf-8", xml: :text)) end + + def test_iso2022jp_invalid_replace + assert_equal("?x".force_encoding("iso-2022-jp"), + "\222\xA1x".encode("iso-2022-jp", "stateless-iso-2022-jp", :invalid => :replace)) + end end diff --git a/test/ruby/test_io.rb b/test/ruby/test_io.rb index 6b2dc3102d..35f575aa13 100644 --- a/test/ruby/test_io.rb +++ b/test/ruby/test_io.rb @@ -636,6 +636,10 @@ class TestIO < Test::Unit::TestCase assert_equal("", f2.read) end + proc do + open(__FILE__) # see Bug #493 [ruby-dev:35957] + end.call + pipe2 do |r, w| assert_raise(Errno::EMFILE, Errno::ENFILE, Errno::ENOMEM) do r2, w2 = r.dup, w.dup diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index f0111243ce..d7a4aca99c 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -546,6 +546,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u005C", "\x5C", "eucJP-ms") check_both_ways("\u005C", "\x5C", "CP51932") check_both_ways("\u005C", "\x5C", "ISO-2022-JP") + assert_equal("\u005C", "\e(B\x5C\e(B".encode("UTF-8", "ISO-2022-JP")) assert_equal("\u005C", "\e(J\x5C\e(B".encode("UTF-8", "ISO-2022-JP")) assert_raise(Encoding::ConversionUndefined) { "\u00A5".encode("Shift_JIS") } assert_raise(Encoding::ConversionUndefined) { "\u00A5".encode("Windows-31J") } @@ -568,6 +569,7 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u007E", "\x7E", "eucJP-ms") check_both_ways("\u007E", "\x7E", "CP51932") check_both_ways("\u007E", "\x7E", "ISO-2022-JP") + assert_equal("\u007E", "\e(B\x7E\e(B".encode("UTF-8", "ISO-2022-JP")) assert_equal("\u007E", "\e(J\x7E\e(B".encode("UTF-8", "ISO-2022-JP")) assert_raise(Encoding::ConversionUndefined) { "\u203E".encode("Shift_JIS") } assert_raise(Encoding::ConversionUndefined) { "\u203E".encode("Windows-31J") } diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb index 2eb4a01565..5395460ee5 100644 --- a/tool/transcode-tblgen.rb +++ b/tool/transcode-tblgen.rb @@ -157,27 +157,28 @@ end class ArrayCode def initialize(type, name) - @code = <<"End" -static const #{type} -#{name}[0] = { -}; -End + @type = type + @name = name + @len = 0; + @content = '' end def length - @code[/\[\d+\]/][1...-1].to_i + @len end def insert_at_last(num, str) newnum = self.length + num - @code.sub!(/^(\};\n\z)/) { - str + $1 - } - @code.sub!(/\[\d+\]/) { "[#{newnum}]" } + @content << str + @len += num end def to_s - @code.dup + <<"End" +static const #{@type} +#{@name}[#{@len}] = { +#{@content}}; +End end end @@ -633,7 +634,7 @@ static const rb_transcoder #{input_unit_length}, /* input_unit_length */ #{max_input}, /* max_input */ #{max_output}, /* max_output */ - stateless_converter, /* stateful_type */ + asciicompat_converter, /* asciicompat_type */ 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL, NULL, NULL, NULL diff --git a/transcode.c b/transcode.c index 2fa5309ea5..fd41001689 100644 --- a/transcode.c +++ b/transcode.c @@ -121,7 +121,6 @@ struct rb_econv_t { const unsigned char *error_bytes_start; size_t error_bytes_len; size_t readagain_len; - int partial_input; } last_error; /* The following fields are only for Encoding::Converter. @@ -837,7 +836,6 @@ rb_econv_open_by_transcoder_entries(int n, transcoder_entry_t **entries) ec->last_error.error_bytes_start = NULL; ec->last_error.error_bytes_len = 0; ec->last_error.readagain_len = 0; - ec->last_error.partial_input = 0; ec->source_encoding = NULL; ec->destination_encoding = NULL; for (i = 0; i < ec->num_trans; i++) { @@ -1274,7 +1272,6 @@ rb_econv_convert0(rb_econv_t *ec, gotresult: ec->last_error.result = res; - ec->last_error.partial_input = flags & ECONV_PARTIAL_INPUT; if (res == econv_invalid_byte_sequence || res == econv_incomplete_input || res == econv_undefined_conversion) { @@ -1414,7 +1411,7 @@ rb_econv_encoding_to_insert_output(rb_econv_t *ec) tr = tc->transcoder; - if (tr->stateful_type == stateful_encoder) + if (tr->asciicompat_type == asciicompat_encoder) return tr->src_encoding; return tr->dst_encoding; } @@ -1528,7 +1525,7 @@ rb_econv_insert_output(rb_econv_t *ec, data_end_p = &ec->in_data_end; buf_end_p = &ec->in_buf_end; } - else if (tc->transcoder->stateful_type == stateful_encoder) { + else if (tc->transcoder->asciicompat_type == asciicompat_encoder) { need += tc->readagain_len; if (need < insert_len) goto fail; @@ -1578,13 +1575,13 @@ rb_econv_insert_output(rb_econv_t *ec, } } - if (tc && tc->transcoder->stateful_type == stateful_encoder) { + memcpy(*data_end_p, insert_str, insert_len); + *data_end_p += insert_len; + if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) { memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len); *data_end_p += tc->readagain_len; tc->readagain_len = 0; } - memcpy(*data_end_p, insert_str, insert_len); - *data_end_p += insert_len; if (insert_str != str && insert_str != insert_buf) xfree((void*)insert_str); @@ -1633,43 +1630,53 @@ rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n) tc->readagain_len -= n; } -struct stateless_encoding_t { - const char *stateless_enc; - const char *stateful_enc; +struct asciicompat_encoding_t { + const char *ascii_compat_name; + const char *ascii_incompat_name; }; static int -stateless_encoding_i(st_data_t key, st_data_t val, st_data_t arg) +asciicompat_encoding_i(st_data_t key, st_data_t val, st_data_t arg) { - struct stateless_encoding_t *data = (struct stateless_encoding_t *)arg; - st_table *table2 = (st_table *)val; - st_data_t v; + struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t *)arg; + transcoder_entry_t *entry = (transcoder_entry_t *)val; + const rb_transcoder *tr; - if (st_lookup(table2, (st_data_t)data->stateful_enc, &v)) { - transcoder_entry_t *entry = (transcoder_entry_t *)v; - const rb_transcoder *tr; - if (SUPPLEMENTAL_CONVERSION(entry->sname, entry->dname)) { - return ST_CONTINUE; - } - tr = load_transcoder_entry(entry); - if (tr && tr->stateful_type == stateful_encoder) { - data->stateless_enc = tr->src_encoding; - return ST_STOP; - } + if (SUPPLEMENTAL_CONVERSION(entry->sname, entry->dname)) + return ST_CONTINUE; + tr = load_transcoder_entry(entry); + if (tr && tr->asciicompat_type == asciicompat_decoder) { + data->ascii_compat_name = tr->dst_encoding; + return ST_STOP; } return ST_CONTINUE; } const char * -rb_econv_stateless_encoding(const char *stateful_enc) -{ - struct stateless_encoding_t data; - data.stateful_enc = stateful_enc; - data.stateless_enc = NULL; - st_foreach(transcoder_table, stateless_encoding_i, (st_data_t)&data); - if (data.stateless_enc) - return data.stateless_enc; - return NULL; +rb_econv_asciicompat_encoding(const char *ascii_incompat_name) +{ + st_data_t v; + st_table *table2; + struct asciicompat_encoding_t data; + + if (!st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v)) + return NULL; + table2 = (st_table *)v; + + /* + * Assumption: + * There are at most one transcoder for + * converting from ASCII incompatible encoding. + * + * For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others. + */ + if (table2->num_entries != 1) + return NULL; + + data.ascii_incompat_name = ascii_incompat_name; + data.ascii_compat_name = NULL; + st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data); + return data.ascii_compat_name; } VALUE @@ -2510,42 +2517,42 @@ make_dummy_encoding(const char *name) /* * call-seq: - * Encoding::Converter.stateless_encoding(string) => encoding or nil - * Encoding::Converter.stateless_encoding(encoding) => encoding or nil - * - * returns the corresponding stateless encoding. + * Encoding::Converter.asciicompat_encoding(string) => encoding or nil + * Encoding::Converter.asciicompat_encoding(encoding) => encoding or nil * - * It returns nil if the argument is not a stateful encoding. + * returns the corresponding ASCII compatible encoding. * - * "corresponding stateless encoding" is a stateless encoding which - * represents same characters in the statefull encoding. + * It returns nil if the argument is an ASCII compatible encoding. * - * So, no conversion undefined error occur between the stateful encoding and the stateless encoding. + * "corresponding ASCII compatible encoding" is a ASCII compatible encoding which + * represents same characters in the given ASCII incompatible encoding. * - * For ISO-2022-JP, the dedicated stateless encoding, stateless-ISO-2022-JP, is defined. + * So, no conversion undefined error occur between the ASCII compatible and incompatible encoding. * * Encoding::Converter.stateless_encoding("ISO-2022-JP") #=> #<Encoding:stateless-ISO-2022-JP> + * Encoding::Converter.stateless_encoding("UTF-16BE") #=> #<Encoding:UTF-8> + * Encoding::Converter.stateless_encoding("UTF-8") #=> nil * */ static VALUE -econv_s_stateless_encoding(VALUE klass, VALUE arg) +econv_s_asciicompat_encoding(VALUE klass, VALUE arg) { - const char *stateful_name, *stateless_name; - rb_encoding *stateful_enc, *stateless_enc; + const char *arg_name, *result_name; + rb_encoding *arg_enc, *result_enc; - enc_arg(arg, &stateful_name, &stateful_enc); + enc_arg(arg, &arg_name, &arg_enc); - stateless_name = rb_econv_stateless_encoding(stateful_name); + result_name = rb_econv_asciicompat_encoding(arg_name); - if (stateless_name == NULL) + if (result_name == NULL) return Qnil; - stateless_enc = rb_enc_find(stateless_name); + result_enc = rb_enc_find(result_name); - if (!stateless_enc) - stateless_enc = make_dummy_encoding(stateless_name); + if (!result_enc) + result_enc = make_dummy_encoding(result_name); - return rb_enc_from_encoding(stateless_enc); + return rb_enc_from_encoding(result_enc); } /* @@ -3570,7 +3577,7 @@ InitVM_transcode(rb_vm_t *vm) rb_cEncodingConverter = rb_define_class_under(rb_cEncoding, "Converter", rb_cData); rb_define_alloc_func(rb_cEncodingConverter, econv_s_allocate); - rb_define_singleton_method(rb_cEncodingConverter, "stateless_encoding", econv_s_stateless_encoding, 1); + rb_define_singleton_method(rb_cEncodingConverter, "asciicompat_encoding", econv_s_asciicompat_encoding, 1); rb_define_method(rb_cEncodingConverter, "initialize", econv_init, -1); rb_define_method(rb_cEncodingConverter, "inspect", econv_inspect, 0); rb_define_method(rb_cEncodingConverter, "source_encoding", econv_source_encoding, 0); diff --git a/transcode_data.h b/transcode_data.h index 4587815c67..98f48a4de9 100644 --- a/transcode_data.h +++ b/transcode_data.h @@ -57,11 +57,11 @@ #define THREETRAIL /* legal but undefined if three more trailing UTF-8 */ typedef enum { - stateless_converter, /* stateless -> stateless */ - stateful_decoder, /* stateful -> stateless */ - stateful_encoder /* stateless -> stateful */ - /* stateful -> stateful is intentionally ommitted. */ -} rb_transcoder_stateful_type_t; + asciicompat_converter, /* ASCII-compatible -> ASCII-compatible */ + asciicompat_decoder, /* ASCII-incompatible -> ASCII-compatible */ + asciicompat_encoder /* ASCII-compatible -> ASCII-incompatible */ + /* ASCII-incompatible -> ASCII-incompatible is intentionally ommitted. */ +} rb_transcoder_asciicompat_type_t; typedef struct rb_transcoder rb_transcoder; @@ -78,7 +78,7 @@ struct rb_transcoder { int input_unit_length; int max_input; int max_output; - rb_transcoder_stateful_type_t stateful_type; + rb_transcoder_asciicompat_type_t asciicompat_type; size_t state_size; int (*state_init_func)(void*); /* ret==0:success ret!=0:failure(errno) */ int (*state_fini_func)(void*); /* ret==0:success ret!=0:failure(errno) */ @@ -1808,6 +1808,7 @@ InitVM_VM(rb_vm_t *vm) rb_define_method_id(klass, id_core_define_singleton_method, m_core_define_singleton_method, 3); rb_define_method_id(klass, id_core_set_postexe, m_core_set_postexe, 1); rb_obj_freeze(fcore); + rb_global_variable(&rb_mRubyVMFrozenCore); rb_mRubyVMFrozenCore = fcore; /* ::VM::Env */ diff --git a/win32/Makefile.sub b/win32/Makefile.sub index 6ae4bca95a..1e394eeceb 100644 --- a/win32/Makefile.sub +++ b/win32/Makefile.sub @@ -355,6 +355,9 @@ $(CONFIG_H): $(MKFILES) $(srcdir)/win32/Makefile.sub $(win_srcdir)/Makefile.sub #define SIZEOF_SIZE_T 4 #define SIZEOF_PTRDIFF_T 4 !endif +!if $(MSC_VER) < 1400 +#define SIZE_MAX UINT_MAX +!endif #define HAVE_PROTOTYPES 1 #define TOKEN_PASTE(x,y) x##y #define HAVE_STDARG_PROTOTYPES 1 diff --git a/win32/win32.c b/win32/win32.c index 5ba7557ed0..5774d0c503 100644 --- a/win32/win32.c +++ b/win32/win32.c @@ -3440,7 +3440,6 @@ rb_w32_fstati64(int fd, struct stati64 *st) static time_t filetime_to_unixtime(const FILETIME *ft) { - FILETIME loc; struct timeval tv; if (filetime_to_timeval(ft, &tv) == (time_t)-1) @@ -3966,21 +3965,21 @@ rb_w32_getppid(void) HANDLE hNtDll = GetModuleHandle("ntdll.dll"); if (hNtDll) { pNtQueryInformationProcess = (long (WINAPI *)(HANDLE, int, void *, ULONG, ULONG *))GetProcAddress(hNtDll, "NtQueryInformationProcess"); - if (pNtQueryInformationProcess) { - struct { - long ExitStatus; - void* PebBaseAddress; - ULONG AffinityMask; - ULONG BasePriority; - ULONG UniqueProcessId; - ULONG ParentProcessId; - } pbi; - ULONG len; - long ret = pNtQueryInformationProcess(GetCurrentProcess(), 0, &pbi, sizeof(pbi), &len); - if (!ret) { - ppid = pbi.ParentProcessId; - } - } + } + } + if (pNtQueryInformationProcess) { + struct { + long ExitStatus; + void* PebBaseAddress; + ULONG AffinityMask; + ULONG BasePriority; + ULONG UniqueProcessId; + ULONG ParentProcessId; + } pbi; + ULONG len; + long ret = pNtQueryInformationProcess(GetCurrentProcess(), 0, &pbi, sizeof(pbi), &len); + if (!ret) { + ppid = pbi.ParentProcessId; } } } |