summaryrefslogtreecommitdiff
path: root/sql/sql_string.cc
diff options
context:
space:
mode:
authorMichael Widenius <monty@askmonty.org>2009-07-02 13:15:33 +0300
committerMichael Widenius <monty@askmonty.org>2009-07-02 13:15:33 +0300
commit9db357e2bfebf9207a507c4a2244499899a960a2 (patch)
treeec4503d3142abaf8379c46752cb5a3adcd7c9471 /sql/sql_string.cc
parentf1183fc160b504af3c976d7e1e95e19128b6ecbe (diff)
downloadmariadb-git-9db357e2bfebf9207a507c4a2244499899a960a2.tar.gz
Added MY_CS_NONASCII marker for character sets that are not compatible with latin1 for characters 0x00-0x7f
This allows us to skip and speed up some very common character converts that MySQL is doing when sending data to the client and this gives us a nice speed increase for most queries that uses only characters in the range 0x00-0x7f. This code is based on Alexander Barkov's code that he has done in MySQL 6.0 include/m_ctype.h: Added MY_CS_NONASCII marker libmysqld/lib_sql.cc: Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments mysys/charset.c: Mark character sets with MY_CS_NONASCII scripts/mysql_install_db.sh: Fixed messages to refer to MariaDB instead of MySQL sql/protocol.cc: Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments sql/protocol.h: Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments sql/sql_string.cc: Quicker copy of strings with no characters above 0x7f strings/conf_to_src.c: Added printing of MY_CS_NONASCII strings/ctype-extra.c: Mark incompatible character sets with MY_CS_NONASCII Removed duplicated character set geostd strings/ctype-sjis.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype-uca.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype-ucs2.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype-utf8.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype.c: Added function to check if character set is compatible with latin1 in ranges 0x00-0x7f
Diffstat (limited to 'sql/sql_string.cc')
-rw-r--r--sql/sql_string.cc68
1 files changed, 64 insertions, 4 deletions
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index f0324da328f..8b4294c5caf 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -782,10 +782,11 @@ String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
*/
-uint32
-copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
- const char *from, uint32 from_length, CHARSET_INFO *from_cs,
- uint *errors)
+static uint32
+copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs,
+ const char *from, uint32 from_length,
+ CHARSET_INFO *from_cs,
+ uint *errors)
{
int cnvres;
my_wc_t wc;
@@ -900,6 +901,65 @@ my_copy_with_hex_escaping(CHARSET_INFO *cs,
}
/*
+ Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
+*/
+uint32
+copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
+ const char *from, uint32 from_length, CHARSET_INFO *from_cs,
+ uint *errors)
+{
+ /*
+ If any of the character sets is not ASCII compatible,
+ immediately switch to slow mb_wc->wc_mb method.
+ */
+ if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
+ return copy_and_convert_extended(to, to_length, to_cs,
+ from, from_length, from_cs, errors);
+
+ uint32 length= min(to_length, from_length), length2= length;
+
+#if defined(__i386__)
+ /*
+ Special loop for i386, it allows to refer to a
+ non-aligned memory block as UINT32, which makes
+ it possible to copy four bytes at once. This
+ gives about 10% performance improvement comparing
+ to byte-by-byte loop.
+ */
+ for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
+ {
+ if ((*(uint32*)from) & 0x80808080)
+ break;
+ *((uint32*) to)= *((const uint32*) from);
+ }
+#endif
+
+ for (; ; *to++= *from++, length--)
+ {
+ if (!length)
+ {
+ *errors= 0;
+ return length2;
+ }
+ if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
+ {
+ uint32 copied_length= length2 - length;
+ to_length-= copied_length;
+ from_length-= copied_length;
+ return copied_length + copy_and_convert_extended(to, to_length,
+ to_cs,
+ from, from_length,
+ from_cs,
+ errors);
+ }
+ }
+
+ DBUG_ASSERT(FALSE); // Should never get to here
+ return 0; // Make compiler happy
+}
+
+
+/*
copy a string,
with optional character set conversion,
with optional left padding (for binary -> UCS2 conversion)