Associate a charset directly with its number in the Index file, and

propogate those changes through the code. This is so that there can be holes in the list of charsets without breaking old tables. configure.in: - changed pattern for getting number from charsets Index file mysys/charset.c: - changed from using a TYPELIB to a CS_ID struct, so both the name and the number of a charset is stored in available_charsets sql/share/charsets/Index: - made the number a real part of the Index file, not just a comment sql/share/charsets/README: - order is no longer significant, but each charset must be paired with its number
author: unknown <tim@localhost.polyesthetic.msg> 2000-08-22 16:08:34 -0400
committer: unknown <tim@localhost.polyesthetic.msg> 2000-08-22 16:08:34 -0400
commit: db3b3c1799d111a22c5aaead6426b7f064ea8fd9 (patch)
tree: 0e7c7503846eda7c08f4344ed455ae2de66a3598
parent: 49b83f3810d20a73d74d0ccaacc22d64a6413569 (diff)
download: mariadb-git-db3b3c1799d111a22c5aaead6426b7f064ea8fd9.tar.gz
4 files changed, 97 insertions, 83 deletions
diff --git a/configure.in b/configure.in
index f200fefca33..bb3ff9920e6 100644
--- a/configure.in
+++ b/configure.in
@@ -1577,15 +1577,6 @@ do
     See the Installation chapter in the Reference Manual.]);
   fi
 done
-
-default_charset_has_source=0
-for cs in $COMPILED_CHARSETS
-do
-  if test $cs = $default_charset
-  then
-    default_charset_has_source=1
-  fi
-done
   
 CHARSET_SRCS=""
 CHARSETS_NEED_SOURCE=""
@@ -1600,8 +1591,10 @@ index_file="$srcdir/sql/share/charsets/Index"
 for c in $CHARSETS
 do
   # get the charset number from $index_file
-  subpat='^'"${c}"'[[\t ]]*#'
-  number=`$AWK 'sub("'"$subpat"'", "") { print }' $index_file`
+changequote(,)dnl
+  subpat='^'"${c}"'[ 	][ 	]*\([0-9][0-9]*\)[^0-9]*$'
+  number=`sed -e "/$subpat/!d" -e 's//\1/' $index_file`
+changequote([,])dnl
   # some sanity checking....
   if test X"$number" = X
   then
diff --git a/mysys/charset.c b/mysys/charset.c
index bf51184589c..0cc85058eeb 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -21,9 +21,14 @@
 #include <m_string.h>
 #include <my_dir.h>
 
+typedef struct cs_id_st {
+  char *name;
+  uint number;
+} CS_ID;
+
 const char *charsets_dir = NULL;
 static DYNAMIC_ARRAY cs_info_table;
-static TYPELIB available_charsets;
+static CS_ID *available_charsets;
 static int charset_initialized=0;
 
 #define MAX_LINE  1024
@@ -46,6 +51,24 @@ uint compiled_charset_number(const char *name);
 const char *compiled_charset_name(uint charset_number);
 
 
+static uint num_from_csname(CS_ID *cs, const char *name)
+{
+  CS_ID *c;
+  for (c = cs; c; ++c)
+    if (!strcmp(c->name, name))
+      return c->number;
+  return 0;   /* this mimics find_type() */
+}
+
+static char *name_from_csnum(CS_ID *cs, uint number)
+{
+  CS_ID *c;
+  for (c = cs; c; ++c)
+    if (c->number == number)
+      return c->name;
+  return "?";   /* this mimics find_type() */
+}
+
 static my_bool get_word(struct simpleconfig_buf_st *fb, char *buf)
 {
   char *endptr=fb->p;
@@ -92,12 +115,12 @@ static char *get_charsets_dir(char *buf)
 }
 
 
-static my_bool read_charset_index(TYPELIB *charsets, myf myflags)
+static my_bool read_charset_index(CS_ID **charsets, myf myflags)
 {
   struct simpleconfig_buf_st fb;
-  char buf[MAX_LINE];
+  char buf[MAX_LINE], num_buf[MAX_LINE];
   DYNAMIC_ARRAY cs;
-  my_string s;
+  CS_ID *csid;
 
   strmov(get_charsets_dir(buf), "Index");
 
@@ -106,36 +129,42 @@ static my_bool read_charset_index(TYPELIB *charsets, myf myflags)
   fb.buf[0] = '\0';
   fb.p = fb.buf;
 
-  if (init_dynamic_array(&cs, sizeof(my_string), 32, 32))
+  if (init_dynamic_array(&cs, sizeof(CS_ID *), 32, 32))
     return TRUE;
 
-  while (!get_word(&fb, buf))
+  while (!get_word(&fb, buf) && !get_word(&fb, num_buf))
   {
+    uint csnum;
     uint length;
-    if (!(s= (char*) my_once_alloc(length= (uint) strlen(buf)+1, myflags)))
+
+    if (!(csnum = atoi(num_buf)))
     {
+      /* corrupt Index file */
       my_fclose(fb.f,myflags);
       return TRUE;
     }
-    memcpy(s,buf,length);
-    insert_dynamic(&cs, (gptr) &s);
+
+    if (!(csid = (CS_ID*) my_once_alloc(sizeof(CS_ID), myflags)) ||
+        !(csid->name=
+           (char*) my_once_alloc(length= (uint) strlen(buf)+1, myflags)))
+    {
+      my_fclose(fb.f,myflags);
+      return TRUE;
+    }
+    memcpy(csid->name,buf,length);
+    csid->number = csnum;
+
+    insert_dynamic(&cs, (gptr) &csid);
   }
   my_fclose(fb.f,myflags);
 
-  /* I seriously doubt this is the best way to initialize this
-   * TYPELIB from the Index file.  But it's the best way I could
-   * come up with right now. */
 
-  charsets->count = cs.elements;
-  charsets->name  = "";
-  if (!(charsets->type_names =
-	(const char **) my_once_alloc((cs.elements + 1) * sizeof(const char *),
-				      myflags)))
+  if (!(*charsets =
+      (CS_ID *) my_once_alloc((cs.elements + 1) * sizeof(CS_ID *), myflags)))
     return TRUE;
   /* unwarranted chumminess with dynamic_array implementation? */
-  memcpy((char*) charsets->type_names, cs.buffer,
-	 cs.elements * sizeof(my_string *));
-  charsets->type_names[cs.elements] = NullS;
+  memcpy((byte *) *charsets, cs.buffer, cs.elements * sizeof(CS_ID *));
+  (*charsets)[cs.elements] = NULL;
   delete_dynamic(&cs);  
 
   return FALSE;
@@ -164,7 +193,7 @@ static my_bool init_available_charsets(myf myflags)
     charset_initialized=1;
     pthread_mutex_unlock(&THR_LOCK_charset);
   }
-  return error || available_charsets.count == 0;
+  return error || !available_charsets[0];
 }
 
 
@@ -193,7 +222,7 @@ static my_bool fill_array(uchar *array, int sz, struct simpleconfig_buf_st *fb)
 static void get_charset_conf_name(uint cs_number, char *buf)
 {
   strxmov(get_charsets_dir(buf),
-          get_type(&available_charsets, cs_number - 1), ".conf", NullS);
+          name_from_csnum(&available_charsets, cs_number), ".conf", NullS);
 }
 
 
@@ -237,7 +266,7 @@ uint get_charset_number(const char *charset_name)
   if (error)
     return compiled_charset_number(charset_name);
   else
-    return find_type((char*)charset_name, &available_charsets, 1);
+    return num_from_csname((char*)charset_name, &available_charsets, 1);
 }
 
 const char *get_charset_name(uint charset_number)
@@ -247,7 +276,7 @@ const char *get_charset_name(uint charset_number)
   if (error)
     return compiled_charset_name(charset_number);
   else
-    return get_type(&available_charsets, charset_number - 1);
+    return name_from_csnum(&available_charsets, charset_number);
 }
 
 
@@ -452,29 +481,27 @@ char * list_charsets(myf want_flags)
 
   if (want_flags & MY_CONFIG_SETS)
   {
-    uint i;
-    const char *cs_name;
+    CS_ID *c;
     char buf[FN_REFLEN];
     MY_STAT stat;
 
-    for (i = 0; i < available_charsets.count; i++)
+    for (c = available_charsets; *c; ++c)
     {
-      cs_name = get_type(&available_charsets, i);
-      if (charset_in_string(cs_name, &s))
+      if (charset_in_string(c->name, &s))
         continue;
-      get_charset_conf_name(i + 1, buf);
+      get_charset_conf_name(c->number, buf);
       if (!my_stat(buf, &stat, MYF(0)))
         continue;       /* conf file doesn't exist */
-      dynstr_append(&s, cs_name);
+      dynstr_append(&s, c->name);
       dynstr_append(&s, " ");
     }
   }
 
   if (want_flags & MY_INDEX_SETS)
   {
-    uint i;
-    for (i = 0; i < available_charsets.count; i++)
-      charset_append(&s, get_type(&available_charsets, i));
+    CS_ID *c;
+    for (c = available_charsets; *c; ++c)
+      charset_append(&s, c->name);
   }
 
   if (want_flags & MY_LOADED_SETS)
diff --git a/sql/share/charsets/Index b/sql/share/charsets/Index
index 8d5e7576d56..fd139db46e9 100644
--- a/sql/share/charsets/Index
+++ b/sql/share/charsets/Index
@@ -2,36 +2,33 @@
 #
 # This file lists all of the available character sets.
 
-# THE ORDER IN WHICH CHARACTER SETS ARE LISTED IS IMPORTANT.  See the
-# README file in this directory for details.
 
-
-big5 		  #  1
-czech 		  #  2
-dec8 		  #  3
-dos 		  #  4
-german1 	  #  5
-hp8 		  #  6
-koi8_ru 	  #  7
-latin1		  #  8
-latin2		  #  9
-swe7 		  # 10
-usa7 		  # 11
-ujis 		  # 12
-sjis 		  # 13
-cp1251		  # 14
-danish		  # 15
-hebrew		  # 16
-win1251 	  # 17
-tis620		  # 18
-euc_kr		  # 19
-estonia 	  # 20
-hungarian 	  # 21
-koi8_ukr 	  # 22
-win1251ukr 	  # 23
-gb2312		  # 24
-greek 		  # 25
-win1250 	  # 26
-croat 		  # 27
-gbk 		  # 28
-cp1257		  # 29
+big5 		   1
+czech 		   2
+dec8 		   3
+dos 		   4
+german1 	   5
+hp8 		   6
+koi8_ru 	   7
+latin1		   8
+latin2		   9
+swe7 		  10
+usa7 		  11
+ujis 		  12
+sjis 		  13
+cp1251		  14
+danish		  15
+hebrew		  16
+win1251 	  17
+tis620		  18
+euc_kr		  19
+estonia 	  20
+hungarian 	  21
+koi8_ukr 	  22
+win1251ukr 	  23
+gb2312		  24
+greek 		  25
+win1250 	  26
+croat 		  27
+gbk 		  28
+cp1257		  29
diff --git a/sql/share/charsets/README b/sql/share/charsets/README
index 80da6ba9665..172d1ee8e1e 100644
--- a/sql/share/charsets/README
+++ b/sql/share/charsets/README
@@ -9,10 +9,9 @@ different character sets.  It contains:
 Index
     The Index file lists all of the available charset configurations.
 
-    THE ORDER OF THE CHARACTER SETS IN THIS FILE IS SIGNIFICANT.
-    The first character set is number 1, the second is number 2, etc.  The
-    number is stored IN THE DATABASE TABLE FILES and must not be changed.
-    Always add new character sets to the end of the list, so that the
+    Each charset is paired with a number.  The number is stored
+    IN THE DATABASE TABLE FILES and must not be changed.  Always
+    add new character sets to the end of the list, so that the
     numbers of the other character sets will not be changed.
 
 Compiled in or configuration file?
@@ -39,5 +38,3 @@ Syntax of configuration files
     number in hexadecimal format.  The ctype array takes up the first
     257 words; the to_lower, to_upper and sort_order arrays take up 256
     words each after that.
-
-    The Index file is simply a list of the available character sets.
author	unknown <tim@localhost.polyesthetic.msg>	2000-08-22 16:08:34 -0400
committer	unknown <tim@localhost.polyesthetic.msg>	2000-08-22 16:08:34 -0400
commit	db3b3c1799d111a22c5aaead6426b7f064ea8fd9 (patch)
tree	0e7c7503846eda7c08f4344ed455ae2de66a3598
parent	49b83f3810d20a73d74d0ccaacc22d64a6413569 (diff)
download	mariadb-git-db3b3c1799d111a22c5aaead6426b7f064ea8fd9.tar.gz