diff options
author | Michael Widenius <monty@askmonty.org> | 2009-07-02 13:15:33 +0300 |
---|---|---|
committer | Michael Widenius <monty@askmonty.org> | 2009-07-02 13:15:33 +0300 |
commit | 9db357e2bfebf9207a507c4a2244499899a960a2 (patch) | |
tree | ec4503d3142abaf8379c46752cb5a3adcd7c9471 /sql/sql_string.cc | |
parent | f1183fc160b504af3c976d7e1e95e19128b6ecbe (diff) | |
download | mariadb-git-9db357e2bfebf9207a507c4a2244499899a960a2.tar.gz |
Added MY_CS_NONASCII marker for character sets that are not compatible with latin1 for characters 0x00-0x7f
This allows us to skip and speed up some very common character converts that MySQL is doing when sending data to the client
and this gives us a nice speed increase for most queries that uses only characters in the range 0x00-0x7f.
This code is based on Alexander Barkov's code that he has done in MySQL 6.0
include/m_ctype.h:
Added MY_CS_NONASCII marker
libmysqld/lib_sql.cc:
Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments
mysys/charset.c:
Mark character sets with MY_CS_NONASCII
scripts/mysql_install_db.sh:
Fixed messages to refer to MariaDB instead of MySQL
sql/protocol.cc:
Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments
sql/protocol.h:
Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments
sql/sql_string.cc:
Quicker copy of strings with no characters above 0x7f
strings/conf_to_src.c:
Added printing of MY_CS_NONASCII
strings/ctype-extra.c:
Mark incompatible character sets with MY_CS_NONASCII
Removed duplicated character set geostd
strings/ctype-sjis.c:
Mark incompatible character sets with MY_CS_NONASCII
strings/ctype-uca.c:
Mark incompatible character sets with MY_CS_NONASCII
strings/ctype-ucs2.c:
Mark incompatible character sets with MY_CS_NONASCII
strings/ctype-utf8.c:
Mark incompatible character sets with MY_CS_NONASCII
strings/ctype.c:
Added function to check if character set is compatible with latin1 in ranges 0x00-0x7f
Diffstat (limited to 'sql/sql_string.cc')
-rw-r--r-- | sql/sql_string.cc | 68 |
1 files changed, 64 insertions, 4 deletions
diff --git a/sql/sql_string.cc b/sql/sql_string.cc index f0324da328f..8b4294c5caf 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -782,10 +782,11 @@ String *copy_if_not_alloced(String *to,String *from,uint32 from_length) */ -uint32 -copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, - const char *from, uint32 from_length, CHARSET_INFO *from_cs, - uint *errors) +static uint32 +copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs, + const char *from, uint32 from_length, + CHARSET_INFO *from_cs, + uint *errors) { int cnvres; my_wc_t wc; @@ -900,6 +901,65 @@ my_copy_with_hex_escaping(CHARSET_INFO *cs, } /* + Optimized for quick copying of ASCII characters in the range 0x00..0x7F. +*/ +uint32 +copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, + const char *from, uint32 from_length, CHARSET_INFO *from_cs, + uint *errors) +{ + /* + If any of the character sets is not ASCII compatible, + immediately switch to slow mb_wc->wc_mb method. + */ + if ((to_cs->state | from_cs->state) & MY_CS_NONASCII) + return copy_and_convert_extended(to, to_length, to_cs, + from, from_length, from_cs, errors); + + uint32 length= min(to_length, from_length), length2= length; + +#if defined(__i386__) + /* + Special loop for i386, it allows to refer to a + non-aligned memory block as UINT32, which makes + it possible to copy four bytes at once. This + gives about 10% performance improvement comparing + to byte-by-byte loop. + */ + for ( ; length >= 4; length-= 4, from+= 4, to+= 4) + { + if ((*(uint32*)from) & 0x80808080) + break; + *((uint32*) to)= *((const uint32*) from); + } +#endif + + for (; ; *to++= *from++, length--) + { + if (!length) + { + *errors= 0; + return length2; + } + if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */ + { + uint32 copied_length= length2 - length; + to_length-= copied_length; + from_length-= copied_length; + return copied_length + copy_and_convert_extended(to, to_length, + to_cs, + from, from_length, + from_cs, + errors); + } + } + + DBUG_ASSERT(FALSE); // Should never get to here + return 0; // Make compiler happy +} + + +/* copy a string, with optional character set conversion, with optional left padding (for binary -> UCS2 conversion) |