diff options
author | Alexander Barkov <bar@mariadb.org> | 2015-02-27 16:26:12 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2015-02-27 16:26:12 +0400 |
commit | 72d7b12b9c9c5ceffef9fff3adc86c149f57f20f (patch) | |
tree | 7fd38d14cabf307e1ff2493af9497ba754afffba /sql/sql_string.cc | |
parent | 2d01907c1d6c65ff6d851dec032540f2ff57caae (diff) | |
download | mariadb-git-72d7b12b9c9c5ceffef9fff3adc86c149f57f20f.tar.gz |
Reducing duplicate code and simplifying well formed string copying
by adding a new class String_copier.
This is a pre-requisite patch for MDEV-6566 and MDEV-6572,
to avoid adding more similar code.
Diffstat (limited to 'sql/sql_string.cc')
-rw-r--r-- | sql/sql_string.cc | 105 |
1 files changed, 54 insertions, 51 deletions
diff --git a/sql/sql_string.cc b/sql/sql_string.cc index a7bfa6c1455..5eb55463e85 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -875,41 +875,44 @@ my_copy_with_hex_escaping(CHARSET_INFO *cs, /* - copy a string, + Copy a string, with optional character set conversion, with optional left padding (for binary -> UCS2 conversion) - - SYNOPSIS - well_formed_copy_nchars() - to Store result here - to_length Maxinum length of "to" string - to_cs Character set of "to" string - from Copy from here - from_length Length of from string - from_cs From character set - nchars Copy not more that nchars characters - well_formed_error_pos Return position when "from" is not well formed + + In case if there is a Unicode conversion (i.e. to_cs and from_cs are + different character sets and both are not &my_charset_bin), bad input bytes + as well as characters that cannot be encoded in to_cs are replaced to '?'. + + In case of non-Unicode copying (i.e. to_cs and from_cs are same character set, + or from_cs is &my_charset_bin), the function stops on the first bad + byte sequence. + + The string that is written to "to" is always well-formed. + + @param to The destination string + @param to_length Space available in "to" + @param to_cs Character set of the "to" string + @param from The source string + @param from_length Length of the "from" string + @param from_cs Character set of the "from" string + @param nchars Copy not more than "nchars" characters + + The members as set as follows: + m_well_formed_error_pos To the position when "from" is not well formed or NULL otherwise. - cannot_convert_error_pos Return position where a not convertable + m_cannot_convert_error_pos To the position where a not convertable character met, or NULL otherwise. - from_end_pos Return position where scanning of "from" + m_source_end_pos To the position where scanning of the "from" string stopped. - NOTES - RETURN - length of bytes copied to 'to' + @returns number of bytes that were written to 'to' */ - - -uint32 -well_formed_copy_nchars(CHARSET_INFO *to_cs, - char *to, uint to_length, - CHARSET_INFO *from_cs, - const char *from, uint from_length, - uint nchars, - const char **well_formed_error_pos, - const char **cannot_convert_error_pos, - const char **from_end_pos) +uint +String_copier::well_formed_copy(CHARSET_INFO *to_cs, + char *to, uint to_length, + CHARSET_INFO *from_cs, + const char *from, uint from_length, + uint nchars) { uint res; @@ -920,9 +923,9 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, { if (to_length < to_cs->mbminlen || !nchars) { - *from_end_pos= from; - *cannot_convert_error_pos= NULL; - *well_formed_error_pos= NULL; + m_source_end_pos= from; + m_cannot_convert_error_pos= NULL; + m_well_formed_error_pos= NULL; return 0; } @@ -930,9 +933,9 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, { res= MY_MIN(MY_MIN(nchars, to_length), from_length); memmove(to, from, res); - *from_end_pos= from + res; - *well_formed_error_pos= NULL; - *cannot_convert_error_pos= NULL; + m_source_end_pos= from + res; + m_well_formed_error_pos= NULL; + m_cannot_convert_error_pos= NULL; } else { @@ -964,8 +967,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, &well_formed_error) != to_cs->mbminlen) { - *from_end_pos= *well_formed_error_pos= from; - *cannot_convert_error_pos= NULL; + m_source_end_pos= m_well_formed_error_pos= from; + m_cannot_convert_error_pos= NULL; return 0; } nchars--; @@ -979,9 +982,9 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, res= to_cs->cset->well_formed_len(to_cs, from, from + from_length, nchars, &well_formed_error); memmove(to, from, res); - *from_end_pos= from + res; - *well_formed_error_pos= well_formed_error ? from + res : NULL; - *cannot_convert_error_pos= NULL; + m_source_end_pos= from + res; + m_well_formed_error_pos= well_formed_error ? from + res : NULL; + m_cannot_convert_error_pos= NULL; if (from_offset) res+= to_cs->mbminlen; } @@ -995,8 +998,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, const uchar *from_end= (const uchar*) from + from_length; uchar *to_end= (uchar*) to + to_length; char *to_start= to; - *well_formed_error_pos= NULL; - *cannot_convert_error_pos= NULL; + m_well_formed_error_pos= NULL; + m_cannot_convert_error_pos= NULL; for ( ; nchars; nchars--) { @@ -1005,8 +1008,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, from+= cnvres; else if (cnvres == MY_CS_ILSEQ) { - if (!*well_formed_error_pos) - *well_formed_error_pos= from; + if (!m_well_formed_error_pos) + m_well_formed_error_pos= from; from++; wc= '?'; } @@ -1016,8 +1019,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, A correct multibyte sequence detected But it doesn't have Unicode mapping. */ - if (!*cannot_convert_error_pos) - *cannot_convert_error_pos= from; + if (!m_cannot_convert_error_pos) + m_cannot_convert_error_pos= from; from+= (-cnvres); wc= '?'; } @@ -1026,8 +1029,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, if ((uchar *) from >= from_end) break; // End of line // Incomplete byte sequence - if (!*well_formed_error_pos) - *well_formed_error_pos= from; + if (!m_well_formed_error_pos) + m_well_formed_error_pos= from; from++; wc= '?'; } @@ -1036,8 +1039,8 @@ outp: to+= cnvres; else if (cnvres == MY_CS_ILUNI && wc != '?') { - if (!*cannot_convert_error_pos) - *cannot_convert_error_pos= from_prev; + if (!m_cannot_convert_error_pos) + m_cannot_convert_error_pos= from_prev; wc= '?'; goto outp; } @@ -1047,10 +1050,10 @@ outp: break; } } - *from_end_pos= from; + m_source_end_pos= from; res= (uint) (to - to_start); } - return (uint32) res; + return res; } |