diff options
author | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-08-25 03:29:39 +0000 |
---|---|---|
committer | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-08-25 03:29:39 +0000 |
commit | a25fbe3b3e531bbe479f344af24eaf9d2eeae6ea (patch) | |
tree | 055e58ed569fb28012fadade94f518e0a888e47d /ext/stringio | |
parent | 0ada813abfe3a049da29bd423ba34606a00777bd (diff) | |
download | ruby-a25fbe3b3e531bbe479f344af24eaf9d2eeae6ea.tar.gz |
* encoding.c: provide basic features for M17N.
* parse.y: encoding aware parsing.
* parse.y (pragma_encoding): encoding specification pragma.
* parse.y (rb_intern3): encoding specified symbols.
* string.c (rb_str_length): length based on characters.
for older behavior, bytesize method added.
* string.c (rb_str_index_m): index based on characters. rindex as
well.
* string.c (succ_char): encoding aware succeeding string.
* string.c (rb_str_reverse): reverse based on characters.
* string.c (rb_str_inspect): encoding aware string description.
* string.c (rb_str_upcase_bang): encoding aware case conversion.
downcase, capitalize, swapcase as well.
* string.c (rb_str_tr_bang): tr based on characters. delete,
squeeze, tr_s, count as well.
* string.c (rb_str_split_m): split based on characters.
* string.c (rb_str_each_line): encoding aware each_line.
* string.c (rb_str_each_char): added. iteration based on
characters.
* string.c (rb_str_strip_bang): encoding aware whitespace
stripping. lstrip, rstrip as well.
* string.c (rb_str_justify): encoding aware justifying (ljust,
rjust, center).
* string.c (str_encoding): get encoding attribute from a string.
* re.c (rb_reg_initialize): encoding aware regular expression
* sprintf.c (rb_str_format): formatting (i.e. length count) based
on characters.
* io.c (rb_io_getc): getc to return one-character string.
for older behavior, getbyte method added.
* ext/stringio/stringio.c (strio_getc): ditto.
* io.c (rb_io_ungetc): allow pushing arbitrary string at the
current reading point.
* ext/stringio/stringio.c (strio_ungetc): ditto.
* ext/strscan/strscan.c: encoding support.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'ext/stringio')
-rw-r--r-- | ext/stringio/stringio.c | 79 |
1 files changed, 49 insertions, 30 deletions
diff --git a/ext/stringio/stringio.c b/ext/stringio/stringio.c index 480c9ed378..08a6341b70 100644 --- a/ext/stringio/stringio.c +++ b/ext/stringio/stringio.c @@ -13,7 +13,7 @@ **********************************************************************/ #include "ruby.h" -#include "rubyio.h" +#include "ruby/io.h" #if defined(HAVE_FCNTL_H) || defined(_WIN32) #include <fcntl.h> #elif defined(HAVE_SYS_FCNTL_H) @@ -84,6 +84,18 @@ get_strio(VALUE self) return ptr; } +static VALUE +strio_substr(struct StringIO *ptr, int pos, int len) +{ + VALUE str = ptr->string; + rb_encoding *enc = rb_enc_get(str); + int rlen = RSTRING_LEN(str) - pos; + + if (len > rlen) len = rlen; + if (len < 0) len = 0; + return rb_enc_str_new(RSTRING_PTR(str)+pos, len, enc); +} + #define StringIO(obj) get_strio(obj) #define CLOSED(ptr) (!((ptr)->flags & FMODE_READWRITE)) @@ -603,7 +615,7 @@ strio_each_byte(VALUE self) /* * call-seq: - * strio.getc -> fixnum or nil + * strio.getc -> string or nil * * See IO#getc. */ @@ -611,15 +623,17 @@ static VALUE strio_getc(VALUE self) { struct StringIO *ptr = readable(StringIO(self)); - int c; - char ch; + rb_encoding *enc = rb_enc_get(ptr->string); + int len; + char *p; if (ptr->pos >= RSTRING_LEN(ptr->string)) { return Qnil; } - c = RSTRING_PTR(ptr->string)[ptr->pos++]; - ch = c & 0xff; - return rb_str_new(&ch, 1); + p = RSTRING_PTR(ptr->string)+ptr->pos; + len = rb_enc_mbclen(p, enc); + ptr->pos += len; + return rb_enc_str_new(p, len, rb_enc_get(ptr->string)); } /* @@ -671,30 +685,34 @@ static VALUE strio_ungetc(VALUE self, VALUE c) { struct StringIO *ptr = readable(StringIO(self)); - int cc; - long len, pos = ptr->pos; + long lpos, clen; + char *p, *pend; + rb_encoding *enc; if (NIL_P(c)) return Qnil; if (FIXNUM_P(c)) { - cc = FIX2INT(c); + int cc = FIX2INT(c); + char buf[16]; + + enc = rb_enc_get(ptr->string); + rb_enc_mbcput(cc, buf, enc); + c = rb_enc_str_new(buf, rb_enc_codelen(cc, enc), enc); } else { SafeStringValue(c); - if (RSTRING_LEN(c) > 1) { - rb_warn("IO#ungetc pushes back only one byte"); - } - cc = (unsigned char)RSTRING_PTR(c)[0]; + enc = rb_enc_check(ptr->string, c); } - if (cc != EOF && pos > 0) { - if ((len = RSTRING_LEN(ptr->string)) < pos-- || - (unsigned char)RSTRING_PTR(ptr->string)[pos] != - (unsigned char)cc) { - strio_extend(ptr, pos, 1); - RSTRING_PTR(ptr->string)[pos] = cc; - OBJ_INFECT(ptr->string, self); - } - --ptr->pos; + /* get logical position */ + lpos = 0; p = RSTRING_PTR(ptr->string); pend = p + ptr->pos - 1; + for (;;) { + clen = rb_enc_mbclen(p, enc); + if (p+clen >= pend) break; + p += clen; + lpos++; } + rb_str_update(ptr->string, lpos, ptr->pos ? 1 : 0, c); + ptr->pos = p - RSTRING_PTR(ptr->string); + return Qnil; } @@ -800,7 +818,7 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr) e = s + limit; } if (NIL_P(str)) { - str = rb_str_substr(ptr->string, ptr->pos, e - s); + str = strio_substr(ptr, ptr->pos, e - s); } else if ((n = RSTRING_LEN(str)) == 0) { p = s; @@ -816,13 +834,13 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr) break; } } - str = rb_str_substr(ptr->string, s - RSTRING_PTR(ptr->string), e - s); + str = strio_substr(ptr, s - RSTRING_PTR(ptr->string), e - s); } else if (n == 1) { if ((p = memchr(s, RSTRING_PTR(str)[0], e - s)) != 0) { e = p + 1; } - str = rb_str_substr(ptr->string, ptr->pos, e - s); + str = strio_substr(ptr, ptr->pos, e - s); } else { if (n < e - s) { @@ -843,7 +861,7 @@ strio_getline(int argc, VALUE *argv, struct StringIO *ptr) } } } - str = rb_str_substr(ptr->string, ptr->pos, e - s); + str = strio_substr(ptr, ptr->pos, e - s); } ptr->pos = e - RSTRING_PTR(ptr->string); ptr->lineno++; @@ -944,7 +962,7 @@ strio_write(VALUE self, VALUE str) if (TYPE(str) != T_STRING) str = rb_obj_as_string(str); len = RSTRING_LEN(str); - if (!len) return INT2FIX(0); + if (len == 0) return INT2FIX(0); check_modifiable(ptr); olen = RSTRING_LEN(ptr->string); if (ptr->flags & FMODE_APPEND) { @@ -955,7 +973,8 @@ strio_write(VALUE self, VALUE str) } else { strio_extend(ptr, ptr->pos, len); - rb_str_update(ptr->string, ptr->pos, len, str); + memmove(RSTRING_PTR(ptr->string)+ptr->pos, RSTRING_PTR(str), len); + OBJ_INFECT(ptr->string, str); } OBJ_INFECT(ptr->string, self); ptr->pos += len; @@ -1070,7 +1089,7 @@ strio_read(int argc, VALUE *argv, VALUE self) rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc); } if (NIL_P(str)) { - str = rb_str_substr(ptr->string, ptr->pos, len); + str = strio_substr(ptr, ptr->pos, len); } else { long rest = RSTRING_LEN(ptr->string) - ptr->pos; |