summaryrefslogtreecommitdiff
path: root/sql/sql_string.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/sql_string.cc')
-rw-r--r--sql/sql_string.cc68
1 files changed, 67 insertions, 1 deletions
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 41725128415..edde1b8d8e0 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -1281,3 +1281,69 @@ uint convert_to_printable(char *to, size_t to_len,
*t= '\0';
return t - to;
}
+
+/**
+ Check if an input byte sequence is a valid character string of a given charset
+
+ @param cs The input character set.
+ @param str The input byte sequence to validate.
+ @param length A byte length of the str.
+ @param [out] valid_length A byte length of a valid prefix of the str.
+ @param [out] length_error True in the case of a character length error:
+ some byte[s] in the input is not a valid
+ prefix for a character, i.e. the byte length
+ of that invalid character is undefined.
+
+ @retval true if the whole input byte sequence is a valid character string.
+ The length_error output parameter is undefined.
+
+ @return
+ if the whole input byte sequence is a valid character string
+ then
+ return false
+ else
+ if the length of some character in the input is undefined (MY_CS_ILSEQ)
+ or the last character is truncated (MY_CS_TOOSMALL)
+ then
+ *length_error= true; // fatal error!
+ else
+ *length_error= false; // non-fatal error: there is no wide character
+ // encoding for some input character
+ return true
+*/
+bool validate_string(CHARSET_INFO *cs, const char *str, uint32 length,
+ size_t *valid_length, bool *length_error)
+{
+ if (cs->mbmaxlen > 1)
+ {
+ int well_formed_error;
+ *valid_length= cs->cset->well_formed_len(cs, str, str + length,
+ length, &well_formed_error);
+ *length_error= well_formed_error;
+ return well_formed_error;
+ }
+
+ /*
+ well_formed_len() is not functional on single-byte character sets,
+ so use mb_wc() instead:
+ */
+ *length_error= false;
+
+ const uchar *from= reinterpret_cast<const uchar *>(str);
+ const uchar *from_end= from + length;
+ my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
+
+ while (from < from_end)
+ {
+ my_wc_t wc;
+ int cnvres= (*mb_wc)(cs, &wc, (uchar*) from, from_end);
+ if (cnvres <= 0)
+ {
+ *valid_length= from - reinterpret_cast<const uchar *>(str);
+ return true;
+ }
+ from+= cnvres;
+ }
+ *valid_length= length;
+ return false;
+}