diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-06 09:28:26 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-06 09:28:26 +0000 |
commit | 69406aad505414de34dc8b560ac1eadf147b0dbc (patch) | |
tree | 161d7248925c7bd2c99d3ed6a341e81ba76b40c4 /include/ruby | |
parent | de4ec689910c07a48b81083adc3130b6b6023be3 (diff) | |
download | ruby-69406aad505414de34dc8b560ac1eadf147b0dbc.tar.gz |
* encoding.c (rb_enc_precise_mbclen): new function for mbclen with
validation.
* include/ruby/encoding.h (rb_enc_precise_mbclen): declared.
(MBCLEN_CHARFOUND): new macro.
(MBCLEN_INVALID): new macro.
(MBCLEN_NEEDMORE): new macro.
* include/ruby/oniguruma.h (OnigEncodingTypeST): replace mbc_enc_len
by precise_mbc_enc_len.
(ONIGENC_PRECISE_MBC_ENC_LEN): new macro.
(ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND): new macro.
(ONIGENC_CONSTRUCT_MBCLEN_INVALID): new macro.
(ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE): new macro.
(ONIGENC_MBCLEN_CHARFOUND): new macro.
(ONIGENC_MBCLEN_INVALID): new macro.
(ONIGENC_MBCLEN_NEEDMORE): new macro.
(ONIGENC_MBC_ENC_LEN): use ONIGENC_PRECISE_MBC_ENC_LEN.
* enc/euc_jp.c: validation implemented.
* enc/sjis.c: ditto.
* enc/utf8.c: ditto.
* string.c (rb_str_inspect): use rb_enc_precise_mbclen for invalid
encoding.
(rb_str_valid_encoding_p): new method String#valid_encoding?.
* io.c (rb_io_getc): use rb_enc_precise_mbclen.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14119 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'include/ruby')
-rw-r--r-- | include/ruby/encoding.h | 8 | ||||
-rw-r--r-- | include/ruby/oniguruma.h | 29 |
2 files changed, 34 insertions, 3 deletions
diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index bdef191338..a800f54274 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -68,9 +68,15 @@ rb_encoding * rb_enc_find(const char *name); #define rb_enc_mbminlen(enc) (enc)->min_enc_len #define rb_enc_mbmaxlen(enc) (enc)->max_enc_len -/* ptr,encoding -> mbclen */ +/* ptr,endptr,encoding -> mbclen */ int rb_enc_mbclen(const char*, const char *, rb_encoding*); +/* ptr,endptr,encoding -> chlen, invalid or needmore */ +int rb_enc_precise_mbclen(const char*, const char *, rb_encoding*); +#define MBCLEN_CHARFOUND(ret) ONIGENC_MBCLEN_CHARFOUND(ret) +#define MBCLEN_INVALID(ret) ONIGENC_MBCLEN_INVALID(ret) +#define MBCLEN_NEEDMORE(ret) ONIGENC_MBCLEN_NEEDMORE(ret) + /* code,encoding -> codelen */ int rb_enc_codelen(int, rb_encoding*); diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h index 0a1f614bdb..df4d07b115 100644 --- a/include/ruby/oniguruma.h +++ b/include/ruby/oniguruma.h @@ -144,7 +144,7 @@ typedef struct { typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg); typedef struct OnigEncodingTypeST { - int (*mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc); + int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc); const char* name; int max_enc_len; int min_enc_len; @@ -282,7 +282,32 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; #define ONIGENC_STEP_BACK(enc,start,s,n) \ onigenc_step_back((enc),(start),(s),(n)) -#define ONIGENC_MBC_ENC_LEN(enc,p,e) (enc)->mbc_enc_len(p,e,enc) + +#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n) +#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1) +#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-n) + +static inline int onigenc_mbclen_charfound(int r) { return 0 < r ? r : 0; } +static inline int onigenc_mbclen_needmore(int r) { return r < -1 ? -1 - r : 0; } +#define ONIGENC_MBCLEN_CHARFOUND(r) onigenc_mbclen_charfound(r) +#define ONIGENC_MBCLEN_INVALID(r) ((r) == -1) +#define ONIGENC_MBCLEN_NEEDMORE(r) onigenc_mbclen_needmore(r) + +#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc) + +static inline int onigenc_mbclen_recover(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc) +{ + int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e); + int r; + if (ONIGENC_MBCLEN_INVALID(ret)) + return 1; + else if ((r = ONIGENC_MBCLEN_NEEDMORE(ret))) + return e-p+r; + else + return ONIGENC_MBCLEN_CHARFOUND(ret); +} + +#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_recover(p,e,enc) #define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) #define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) #define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) |