Added MY_CS_NONASCII marker for character sets that are not compatible with latin1 for characters 0x00-0x7f

This allows us to skip and speed up some very common character converts that MySQL is doing when sending data to the client and this gives us a nice speed increase for most queries that uses only characters in the range 0x00-0x7f. This code is based on Alexander Barkov's code that he has done in MySQL 6.0 include/m_ctype.h: Added MY_CS_NONASCII marker libmysqld/lib_sql.cc: Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments mysys/charset.c: Mark character sets with MY_CS_NONASCII scripts/mysql_install_db.sh: Fixed messages to refer to MariaDB instead of MySQL sql/protocol.cc: Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments sql/protocol.h: Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments sql/sql_string.cc: Quicker copy of strings with no characters above 0x7f strings/conf_to_src.c: Added printing of MY_CS_NONASCII strings/ctype-extra.c: Mark incompatible character sets with MY_CS_NONASCII Removed duplicated character set geostd strings/ctype-sjis.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype-uca.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype-ucs2.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype-utf8.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype.c: Added function to check if character set is compatible with latin1 in ranges 0x00-0x7f
author: Michael Widenius <monty@askmonty.org> 2009-07-02 13:15:33 +0300
committer: Michael Widenius <monty@askmonty.org> 2009-07-02 13:15:33 +0300
commit: 9db357e2bfebf9207a507c4a2244499899a960a2 (patch)
tree: ec4503d3142abaf8379c46752cb5a3adcd7c9471 /sql/sql_string.cc
parent: f1183fc160b504af3c976d7e1e95e19128b6ecbe (diff)
download: mariadb-git-9db357e2bfebf9207a507c4a2244499899a960a2.tar.gz
1 files changed, 64 insertions, 4 deletions
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index f0324da328f..8b4294c5caf 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -782,10 +782,11 @@ String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
 */
 
 
-uint32
-copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
-                 const char *from, uint32 from_length, CHARSET_INFO *from_cs,
-                 uint *errors)
+static uint32
+copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
+                          const char *from, uint32 from_length,
+                          CHARSET_INFO *from_cs,
+                          uint *errors)
 {
   int         cnvres;
   my_wc_t     wc;
@@ -900,6 +901,65 @@ my_copy_with_hex_escaping(CHARSET_INFO *cs,
 }
 
 /*
+  Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
+*/
+uint32
+copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
+                 const char *from, uint32 from_length, CHARSET_INFO *from_cs,
+                 uint *errors)
+{
+  /*
+    If any of the character sets is not ASCII compatible,
+    immediately switch to slow mb_wc->wc_mb method.
+  */
+  if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
+    return copy_and_convert_extended(to, to_length, to_cs,
+                                     from, from_length, from_cs, errors);
+
+  uint32 length= min(to_length, from_length), length2= length;
+
+#if defined(__i386__)
+  /*
+    Special loop for i386, it allows to refer to a
+    non-aligned memory block as UINT32, which makes
+    it possible to copy four bytes at once. This
+    gives about 10% performance improvement comparing
+    to byte-by-byte loop.
+  */
+  for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
+  {
+    if ((*(uint32*)from) & 0x80808080)
+      break;
+    *((uint32*) to)= *((const uint32*) from);
+  }
+#endif
+
+  for (; ; *to++= *from++, length--)
+  {
+    if (!length)
+    {
+      *errors= 0;
+      return length2;
+    }
+    if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
+    {
+      uint32 copied_length= length2 - length;
+      to_length-= copied_length;
+      from_length-= copied_length;
+      return copied_length + copy_and_convert_extended(to, to_length,
+                                                       to_cs,
+                                                       from, from_length,
+                                                       from_cs,
+                                                       errors);
+    }
+  }
+
+  DBUG_ASSERT(FALSE); // Should never get to here
+  return 0;           // Make compiler happy
+}
+
+
+/*
   copy a string,
   with optional character set conversion,
   with optional left padding (for binary -> UCS2 conversion)
author	Michael Widenius <monty@askmonty.org>	2009-07-02 13:15:33 +0300
committer	Michael Widenius <monty@askmonty.org>	2009-07-02 13:15:33 +0300
commit	9db357e2bfebf9207a507c4a2244499899a960a2 (patch)
tree	ec4503d3142abaf8379c46752cb5a3adcd7c9471 /sql/sql_string.cc
parent	f1183fc160b504af3c976d7e1e95e19128b6ecbe (diff)
download	mariadb-git-9db357e2bfebf9207a507c4a2244499899a960a2.tar.gz