diff options
Diffstat (limited to 'sql')
-rw-r--r-- | sql/protocol.cc | 64 | ||||
-rw-r--r-- | sql/protocol.h | 2 | ||||
-rw-r--r-- | sql/sql_string.cc | 68 |
3 files changed, 127 insertions, 7 deletions
diff --git a/sql/protocol.cc b/sql/protocol.cc index 54e17ff5c3b..7abc051af68 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -58,6 +58,64 @@ bool Protocol_binary::net_store_data(const uchar *from, size_t length) } + + +/* + net_store_data() - extended version with character set conversion. + + It is optimized for short strings whose length after + conversion is garanteed to be less than 251, which accupies + exactly one byte to store length. It allows not to use + the "convert" member as a temporary buffer, conversion + is done directly to the "packet" member. + The limit 251 is good enough to optimize send_result_set_metadata() + because column, table, database names fit into this limit. +*/ + +#ifndef EMBEDDED_LIBRARY +bool Protocol::net_store_data(const uchar *from, size_t length, + CHARSET_INFO *from_cs, CHARSET_INFO *to_cs) +{ + uint dummy_errors; + /* Calculate maxumum possible result length */ + uint conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen; + if (conv_length > 250) + { + /* + For strings with conv_length greater than 250 bytes + we don't know how many bytes we will need to store length: one or two, + because we don't know result length until conversion is done. + For example, when converting from utf8 (mbmaxlen=3) to latin1, + conv_length=300 means that the result length can vary between 100 to 300. + length=100 needs one byte, length=300 needs to bytes. + + Thus conversion directly to "packet" is not worthy. + Let's use "convert" as a temporary buffer. + */ + return (convert->copy((const char*) from, length, from_cs, + to_cs, &dummy_errors) || + net_store_data((const uchar*) convert->ptr(), convert->length())); + } + + ulong packet_length= packet->length(); + ulong new_length= packet_length + conv_length + 1; + + if (new_length > packet->alloced_length() && packet->realloc(new_length)) + return 1; + + char *length_pos= (char*) packet->ptr() + packet_length; + char *to= length_pos + 1; + + to+= copy_and_convert(to, conv_length, to_cs, + (const char*) from, length, from_cs, &dummy_errors); + + net_store_length((uchar*) length_pos, to - length_pos - 1); + packet->length((uint) (to - packet->ptr())); + return 0; +} +#endif + + /** Send a error string to client. @@ -827,10 +885,10 @@ bool Protocol::store_string_aux(const char *from, size_t length, fromcs != &my_charset_bin && tocs != &my_charset_bin) { - uint dummy_errors; - return (convert->copy(from, length, fromcs, tocs, &dummy_errors) || - net_store_data((uchar*) convert->ptr(), convert->length())); + /* Store with conversion */ + return net_store_data((uchar*) from, length, fromcs, tocs); } + /* Store without conversion */ return net_store_data((uchar*) from, length); } diff --git a/sql/protocol.h b/sql/protocol.h index 1e584295028..3b6a3bdb863 100644 --- a/sql/protocol.h +++ b/sql/protocol.h @@ -43,6 +43,8 @@ protected: MYSQL_FIELD *next_mysql_field; MEM_ROOT *alloc; #endif + bool net_store_data(const uchar *from, size_t length, + CHARSET_INFO *fromcs, CHARSET_INFO *tocs); bool store_string_aux(const char *from, size_t length, CHARSET_INFO *fromcs, CHARSET_INFO *tocs); public: diff --git a/sql/sql_string.cc b/sql/sql_string.cc index 7c9793b273b..593450cacd5 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -794,10 +794,11 @@ String *copy_if_not_alloced(String *to,String *from,uint32 from_length) */ -uint32 -copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, - const char *from, uint32 from_length, CHARSET_INFO *from_cs, - uint *errors) +static uint32 +copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs, + const char *from, uint32 from_length, + CHARSET_INFO *from_cs, + uint *errors) { int cnvres; my_wc_t wc; @@ -849,6 +850,65 @@ outp: } +/* + Optimized for quick copying of ASCII characters in the range 0x00..0x7F. +*/ +uint32 +copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, + const char *from, uint32 from_length, CHARSET_INFO *from_cs, + uint *errors) +{ + /* + If any of the character sets is not ASCII compatible, + immediately switch to slow mb_wc->wc_mb method. + */ + if ((to_cs->state | from_cs->state) & MY_CS_NONASCII) + return copy_and_convert_extended(to, to_length, to_cs, + from, from_length, from_cs, errors); + + uint32 length= min(to_length, from_length), length2= length; + +#if defined(__i386__) + /* + Special loop for i386, it allows to refer to a + non-aligned memory block as UINT32, which makes + it possible to copy four bytes at once. This + gives about 10% performance improvement comparing + to byte-by-byte loop. + */ + for ( ; length >= 4; length-= 4, from+= 4, to+= 4) + { + if ((*(uint32*)from) & 0x80808080) + break; + *((uint32*) to)= *((const uint32*) from); + } +#endif + + for (; ; *to++= *from++, length--) + { + if (!length) + { + *errors= 0; + return length2; + } + if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */ + { + uint32 copied_length= length2 - length; + to_length-= copied_length; + from_length-= copied_length; + return copied_length + copy_and_convert_extended(to, to_length, + to_cs, + from, from_length, + from_cs, + errors); + } + } + + DBUG_ASSERT(FALSE); // Should never get to here + return 0; // Make compiler happy +} + + /** Copy string with HEX-encoding of "bad" characters. |