Added MY_CS_NONASCII marker for character sets that are not compatible with latin1 for characters 0x00-0x7f

This allows us to skip and speed up some very common character converts that MySQL is doing when sending data to the client and this gives us a nice speed increase for most queries that uses only characters in the range 0x00-0x7f. This code is based on Alexander Barkov's code that he has done in MySQL 6.0 include/m_ctype.h: Added MY_CS_NONASCII marker libmysqld/lib_sql.cc: Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments mysys/charset.c: Mark character sets with MY_CS_NONASCII scripts/mysql_install_db.sh: Fixed messages to refer to MariaDB instead of MySQL sql/protocol.cc: Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments sql/protocol.h: Added function net_store_data(...) that takes to and from CHARSET_INFO * as arguments sql/sql_string.cc: Quicker copy of strings with no characters above 0x7f strings/conf_to_src.c: Added printing of MY_CS_NONASCII strings/ctype-extra.c: Mark incompatible character sets with MY_CS_NONASCII Removed duplicated character set geostd strings/ctype-sjis.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype-uca.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype-ucs2.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype-utf8.c: Mark incompatible character sets with MY_CS_NONASCII strings/ctype.c: Added function to check if character set is compatible with latin1 in ranges 0x00-0x7f
author: Michael Widenius <monty@askmonty.org> 2009-07-02 13:15:33 +0300
committer: Michael Widenius <monty@askmonty.org> 2009-07-02 13:15:33 +0300
commit: 9db357e2bfebf9207a507c4a2244499899a960a2 (patch)
tree: ec4503d3142abaf8379c46752cb5a3adcd7c9471 /sql/protocol.cc
parent: f1183fc160b504af3c976d7e1e95e19128b6ecbe (diff)
download: mariadb-git-9db357e2bfebf9207a507c4a2244499899a960a2.tar.gz
1 files changed, 63 insertions, 4 deletions
diff --git a/sql/protocol.cc b/sql/protocol.cc
index a99259ffbcf..e61ad00b50f 100644
--- a/sql/protocol.cc
+++ b/sql/protocol.cc
@@ -58,6 +58,65 @@ bool Protocol_binary::net_store_data(const uchar *from, size_t length)
 }
 
 
+/*
+  net_store_data() - extended version with character set conversion.
+  
+  It is optimized for short strings whose length after
+  conversion is garanteed to be less than 251, which accupies
+  exactly one byte to store length. It allows not to use
+  the "convert" member as a temporary buffer, conversion
+  is done directly to the "packet" member.
+  The limit 251 is good enough to optimize send_fields()
+  because column, table, database names fit into this limit.
+*/
+
+#ifndef EMBEDDED_LIBRARY
+bool Protocol::net_store_data(const uchar *from, size_t length,
+                              CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
+{
+  uint dummy_errors;
+  /* Calculate maxumum possible result length */
+  size_t conv_length= to_cs->mbmaxlen * length / from_cs->mbminlen;
+  ulong packet_length, new_length;
+  char *length_pos, *to;
+
+  if (conv_length > 250)
+  {
+    /*
+      For strings with conv_length greater than 250 bytes
+      we don't know how many bytes we will need to store length: one or two,
+      because we don't know result length until conversion is done.
+      For example, when converting from utf8 (mbmaxlen=3) to latin1,
+      conv_length=300 means that the result length can vary between 100 to 300.
+      length=100 needs one byte, length=300 needs to bytes.
+      
+      Thus conversion directly to "packet" is not worthy.
+      Let's use "convert" as a temporary buffer.
+    */
+    return (convert->copy((const char*) from, length, from_cs, to_cs,
+                          &dummy_errors) ||
+            net_store_data((const uchar*) convert->ptr(), convert->length()));
+  }
+
+  packet_length= packet->length();
+  new_length= packet_length + conv_length + 1;
+
+  if (new_length > packet->alloced_length() && packet->realloc(new_length))
+    return 1;
+
+  length_pos= (char*) packet->ptr() + packet_length;
+  to= length_pos + 1;
+
+  to+= copy_and_convert(to, conv_length, to_cs,
+                        (const char*) from, length, from_cs, &dummy_errors);
+
+  net_store_length((uchar*) length_pos, to - length_pos - 1);
+  packet->length((uint) (to - packet->ptr()));
+  return 0;
+}
+#endif
+
+
 /**
   Send a error string to client.
 
@@ -773,10 +832,10 @@ bool Protocol::store_string_aux(const char *from, size_t length,
       fromcs != &my_charset_bin &&
       tocs != &my_charset_bin)
   {
-    uint dummy_errors;
-    return (convert->copy(from, length, fromcs, tocs, &dummy_errors) ||
-            net_store_data((uchar*) convert->ptr(), convert->length()));
+    /* Store with conversion */
+    return net_store_data((uchar*) from, length, fromcs, tocs);
   }
+  /* Store without conversion */
   return net_store_data((uchar*) from, length);
 }
 
@@ -802,7 +861,7 @@ bool Protocol_text::store(const char *from, size_t length,
 {
   CHARSET_INFO *tocs= this->thd->variables.character_set_results;
 #ifndef DBUG_OFF
-  DBUG_PRINT("info", ("Protocol_text::store field %u (%u): %*s", field_pos,
+  DBUG_PRINT("info", ("Protocol_text::store field %u (%u): %.*s", field_pos,
                       field_count, (int) length, from));
   DBUG_ASSERT(field_pos < field_count);
   DBUG_ASSERT(field_types == 0 ||
author	Michael Widenius <monty@askmonty.org>	2009-07-02 13:15:33 +0300
committer	Michael Widenius <monty@askmonty.org>	2009-07-02 13:15:33 +0300
commit	9db357e2bfebf9207a507c4a2244499899a960a2 (patch)
tree	ec4503d3142abaf8379c46752cb5a3adcd7c9471 /sql/protocol.cc
parent	f1183fc160b504af3c976d7e1e95e19128b6ecbe (diff)
download	mariadb-git-9db357e2bfebf9207a507c4a2244499899a960a2.tar.gz