summaryrefslogtreecommitdiff
path: root/sql/strfunc.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/strfunc.cc')
-rw-r--r--sql/strfunc.cc76
1 files changed, 76 insertions, 0 deletions
diff --git a/sql/strfunc.cc b/sql/strfunc.cc
index c822d10af46..4eb20faa97c 100644
--- a/sql/strfunc.cc
+++ b/sql/strfunc.cc
@@ -235,3 +235,79 @@ uint check_word(TYPELIB *lib, const char *val, const char *end,
*end_of_word= ptr;
return res;
}
+
+
+/*
+ Converts a string between character sets
+
+ SYNOPSIS
+ strconvert()
+ from_cs source character set
+ from source, a null terminated string
+ to destination buffer
+ to_length destination buffer length
+
+ NOTES
+ 'to' is always terminated with a '\0' character.
+ If there is no enough space to convert whole string,
+ only prefix is converted, and terminated with '\0'.
+
+ RETURN VALUES
+ result string length
+*/
+
+
+uint strconvert(CHARSET_INFO *from_cs, const char *from,
+ CHARSET_INFO *to_cs, char *to, uint to_length)
+{
+ int cnvres;
+ my_wc_t wc;
+ char *to_start= to;
+ uchar *to_end= (uchar*) to + to_length - 1;
+ int (*mb_wc)(struct charset_info_st *, my_wc_t *, const uchar *,
+ const uchar *)= from_cs->cset->mb_wc;
+ int (*wc_mb)(struct charset_info_st *, my_wc_t, uchar *s, uchar *e)=
+ to_cs->cset->wc_mb;
+ uint error_count= 0;
+
+ while (1)
+ {
+ /*
+ Using 'from + 10' is safe:
+ - it is enough to scan a single character in any character set.
+ - if remaining string is shorter than 10, then mb_wc will return
+ with error because of unexpected '\0' character.
+ */
+ if ((cnvres= (*mb_wc)(from_cs, &wc,
+ (uchar*) from, (uchar*) from + 10)) > 0)
+ {
+ if (!wc)
+ break;
+ from+= cnvres;
+ }
+ else if (cnvres == MY_CS_ILSEQ)
+ {
+ error_count++;
+ from++;
+ wc= '?';
+ }
+ else
+ break; // Impossible char.
+
+outp:
+
+ if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
+ to+= cnvres;
+ else if (cnvres == MY_CS_ILUNI && wc != '?')
+ {
+ error_count++;
+ wc= '?';
+ goto outp;
+ }
+ else
+ break;
+ }
+ *to= '\0';
+ return (uint32) (to - to_start);
+
+}