diff options
author | unknown <bar@mysql.com> | 2005-12-09 16:37:58 +0400 |
---|---|---|
committer | unknown <bar@mysql.com> | 2005-12-09 16:37:58 +0400 |
commit | 7063bd4d2bfe4688db60b28a15843406299a58f0 (patch) | |
tree | 2868d73a9285634c6a4b999453f6fb27a07e03eb /strings | |
parent | 5aeb69296a4e134f0215da3e6bcce4956b7d76ad (diff) | |
download | mariadb-git-7063bd4d2bfe4688db60b28a15843406299a58f0.tar.gz |
Bug#15377 Valid multibyte sequences are truncated on INSERT
ctype-euc_kr.c:
ctype-gb2312.c:
Adding specific well_formed_length functions
for gb2312 and euckr, to allow storing characters
which are correct according to the character set
specifications but just don't have Unicode mapping.
Previously only those which have Unicode mapping
could be stored, while unassigned characters lead
to data truncation.
Many files:
new file
strings/ctype-gb2312.c:
Bug#15377 Valid multibyte sequences are truncated on INSERT
Adding specific well_formed_length functions
for gb2312 and euckr, to allow storing characters
which are correct according to the character set.
Previously only those which have Unicode mapping
could be stored.
strings/ctype-euc_kr.c:
Adding specific well_formed_length functions
for gb2312 and euckr, to allow storing characters
which are correct according to the character set.
Previously only those which have Unicode mapping
could be stored.
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-euc_kr.c | 37 | ||||
-rw-r--r-- | strings/ctype-gb2312.c | 37 |
2 files changed, 72 insertions, 2 deletions
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c index f15e97de5be..2863b192f50 100644 --- a/strings/ctype-euc_kr.c +++ b/strings/ctype-euc_kr.c @@ -8635,6 +8635,41 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)), } +/* + Returns well formed length of a EUC-KR string. +*/ +static uint +my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, + uint pos, int *error) +{ + const char *b0= b; + const char *emb= e - 1; /* Last possible end of an MB character */ + + *error= 0; + while (pos-- && b < e) + { + if ((uchar) b[0] < 128) + { + /* Single byte ascii character */ + b++; + } + else if (b < emb && iseuc_kr(*b) && iseuc_kr(b[1])) + { + /* Double byte character */ + b+= 2; + } + else + { + /* Wrong byte sequence */ + *error= 1; + break; + } + } + return (uint) (b - b0); +} + + static MY_COLLATION_HANDLER my_collation_ci_handler = { NULL, /* init */ @@ -8655,7 +8690,7 @@ static MY_CHARSET_HANDLER my_charset_handler= mbcharlen_euc_kr, my_numchars_mb, my_charpos_mb, - my_well_formed_len_mb, + my_well_formed_len_euckr, my_lengthsp_8bit, my_numcells_8bit, my_mb_wc_euc_kr, /* mb_wc */ diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c index 0cbad2d1c55..52dd61a8462 100644 --- a/strings/ctype-gb2312.c +++ b/strings/ctype-gb2312.c @@ -5686,6 +5686,41 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)), } +/* + Returns well formed length of a EUC-KR string. +*/ +static uint +my_well_formed_len_gb2312(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, + uint pos, int *error) +{ + const char *b0= b; + const char *emb= e - 1; /* Last possible end of an MB character */ + + *error= 0; + while (pos-- && b < e) + { + if ((uchar) b[0] < 128) + { + /* Single byte ascii character */ + b++; + } + else if (b < emb && isgb2312head(*b) && isgb2312tail(b[1])) + { + /* Double byte character */ + b+= 2; + } + else + { + /* Wrong byte sequence */ + *error= 1; + break; + } + } + return (uint) (b - b0); +} + + static MY_COLLATION_HANDLER my_collation_ci_handler = { NULL, /* init */ @@ -5706,7 +5741,7 @@ static MY_CHARSET_HANDLER my_charset_handler= mbcharlen_gb2312, my_numchars_mb, my_charpos_mb, - my_well_formed_len_mb, + my_well_formed_len_gb2312, my_lengthsp_8bit, my_numcells_8bit, my_mb_wc_gb2312, /* mb_wc */ |