diff options
author | Alexander Barkov <bar@mariadb.com> | 2019-12-03 17:21:25 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2019-12-05 09:45:28 +0400 |
commit | 2c7b6214e7653e5b51c691b2be34067af17956e6 (patch) | |
tree | 112a5aa59becc5d806a6b1d8db1410a9199e0337 | |
parent | 42a4ae54c2f05bccd2d6d752bbc23652962b6929 (diff) | |
download | mariadb-git-2c7b6214e7653e5b51c691b2be34067af17956e6.tar.gz |
A cleanup for MDEV-17088 Provide tools to encode/decode mysql-encoded file system names
- Load and convert the entire input file at once,
rather than reading string-by-string using fgets().
This change makes it possible to convert from UCS2, UTF16, UTF32 data.
- Adding the --delimiter command, to treat the specified
characters as delimiters rather than data to convert.
Useful in combination with `-f filename` or `-t filename`.
The delimiter characters are not converted,
they are copied from the input to the output as is.
- Adding diagnostics with line number and position if:
* an illegal input byte sequence was found
* a character cannot be converted to the target character set
-rw-r--r-- | client/CMakeLists.txt | 3 | ||||
-rw-r--r-- | client/mariadb-conv.cc | 362 | ||||
-rw-r--r-- | mysql-test/std_data/mariadb-conv/file01.utf16.txt | bin | 0 -> 36 bytes | |||
-rw-r--r-- | mysql-test/std_data/mariadb-conv/file01.utf8.txt | 3 | ||||
-rw-r--r-- | mysql-test/std_data/mariadb-conv/file02.latin1.txt | 3 | ||||
-rw-r--r-- | mysql-test/suite/client/mariadb-conv-cp932.result | 2 | ||||
-rw-r--r-- | mysql-test/suite/client/mariadb-conv-cp932.test | 19 | ||||
-rw-r--r-- | mysql-test/suite/client/mariadb-conv-utf16.result | 13 | ||||
-rw-r--r-- | mysql-test/suite/client/mariadb-conv-utf16.test | 21 | ||||
-rw-r--r-- | mysql-test/suite/client/mariadb-conv-utf8.result | 2 | ||||
-rw-r--r-- | mysql-test/suite/client/mariadb-conv-utf8.test | 19 | ||||
-rw-r--r-- | mysql-test/suite/client/mariadb-conv.result | 12 | ||||
-rw-r--r-- | mysql-test/suite/client/mariadb-conv.test | 18 |
13 files changed, 411 insertions, 66 deletions
diff --git a/client/CMakeLists.txt b/client/CMakeLists.txt index 28a7db64250..164424a87ff 100644 --- a/client/CMakeLists.txt +++ b/client/CMakeLists.txt @@ -80,7 +80,8 @@ MYSQL_ADD_EXECUTABLE(mysqlslap mysqlslap.c) SET_SOURCE_FILES_PROPERTIES(mysqlslap.c PROPERTIES COMPILE_FLAGS "-DTHREADS") TARGET_LINK_LIBRARIES(mysqlslap ${CLIENT_LIB}) -MYSQL_ADD_EXECUTABLE(mariadb-conv mariadb-conv.cc) +MYSQL_ADD_EXECUTABLE(mariadb-conv mariadb-conv.cc + ${CMAKE_SOURCE_DIR}/sql/sql_string.cc) TARGET_LINK_LIBRARIES(mariadb-conv mysys strings) # "WIN32" also covers 64 bit. "echo" is used in some files below "mysql-test/". diff --git a/client/mariadb-conv.cc b/client/mariadb-conv.cc index a4517e2ad25..c9185d48bbf 100644 --- a/client/mariadb-conv.cc +++ b/client/mariadb-conv.cc @@ -22,6 +22,8 @@ #include "mariadb.h" #include "client_priv.h" +#include "sql_string.h" +#include "my_dir.h" #define CONV_VERSION "1.0" @@ -31,10 +33,12 @@ class CmdOpt public: const char *m_charset_from; const char *m_charset_to; + const char *m_delimiter; my_bool m_continue; CmdOpt() :m_charset_from("latin1"), m_charset_to("latin1"), + m_delimiter(NULL), m_continue(FALSE) { } static CHARSET_INFO *csinfo_by_name(const char *csname) @@ -61,9 +65,11 @@ static struct my_option long_options[] = &opt.m_charset_from, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"to", 't', "Specifies the encoding of the output.", &opt.m_charset_to, &opt.m_charset_to, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - {"continue", 'c', "When this option is given, characters that cannot be " - "converted are silently discarded, instead of leading to a conversion error.", + {"continue", 'c', "Silently ignore conversion errors.", &opt.m_continue, &opt.m_continue, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"delimiter", 0, "Treat the specified characters as delimiters.", + &opt.m_delimiter, &opt.m_delimiter, 0, GET_STR, REQUIRED_ARG, + 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; @@ -76,69 +82,320 @@ get_one_option(const struct my_option *opt, } -class Conv +class File_buffer: public Binary_string { - CHARSET_INFO *m_tocs; - CHARSET_INFO *m_fromcs; - bool m_continue; public: - Conv(CHARSET_INFO *tocs, CHARSET_INFO *fromcs, bool opt_continue) - :m_tocs(tocs), m_fromcs(fromcs), m_continue(opt_continue) - { } - bool convert_file(FILE *infile) const; - bool convert_file_by_name(const char *filename) const; + bool load_binary_stream(FILE *file); + bool load_binary_file_by_name(const char *file); }; -bool Conv::convert_file(FILE *infile) const +/* + Load data from a binary stream whose length is not known in advance, + e.g. from stdin. +*/ +bool File_buffer::load_binary_stream(FILE *file) +{ + for ( ; ; ) + { + char buf[1024]; + if (length() + sizeof(buf) > UINT_MAX32 || reserve(sizeof(buf))) + { + fprintf(stderr, "Input data is too large\n"); + return true; + } + size_t nbytes= my_fread(file, (uchar *) end(), sizeof(buf), MYF(0)); + if (!nbytes || nbytes == (size_t) -1) + return false; + str_length+= (uint32) nbytes; + } + return false; +} + + +/* + Load data from a file by name. + The file size is know. +*/ +bool File_buffer::load_binary_file_by_name(const char *filename) { - char from[FN_REFLEN + 1], to[FN_REFLEN + 2]; + MY_STAT sbuf; + File fd; - while (fgets(from, sizeof(from), infile) != NULL) + if (!my_stat(filename, &sbuf, MYF(0))) + { + fprintf(stderr, "my_stat failed for '%s'\n", filename); + return true; + } + + if (!MY_S_ISREG(sbuf.st_mode)) { - uint errors; - size_t length= 0; - for (char *s= from; s < from + sizeof(from); s++) + fprintf(stderr, "'%s' is not a regular file\n", filename); + return true; + } + + if ((size_t) sbuf.st_size > UINT_MAX32) + { + fprintf(stderr, "File '%s' is too large\n", filename); + return true; + } + + if (alloc((uint32) sbuf.st_size)) + { + fprintf(stderr, "Failed to allocate read buffer\n"); + return true; + } + + if ((fd= my_open(filename, O_RDONLY, MYF(0))) == -1) + { + fprintf(stderr, "Could not open '%s'\n", filename); + return true; + } + + size_t nbytes= my_read(fd, (uchar*) Ptr, (size_t)sbuf.st_size, MYF(0)); + my_close(fd, MYF(0)); + length((uint32) nbytes); + + return false; +} + + +class Delimiter +{ +protected: + bool m_delimiter[127]; + bool m_has_delimiter_cached; + bool has_delimiter_slow() const + { + for (size_t i= 0; i < sizeof(m_delimiter); i++) { - if (*s == '\0' || *s == '\r' || *s == '\n') + if (m_delimiter[i]) + return true; + } + return false; + } + bool unescape(char *to, char from) const + { + switch (from) { + case '\\': *to= '\\'; return false; + case 'r': *to= '\r'; return false; + case 'n': *to= '\n'; return false; + case 't': *to= '\t'; return false; + case '0': *to= '\0'; return false; + } + *to= '\0'; + return true; + } + bool is_delimiter(char ch) const + { + return ch < 0 ? false : m_delimiter[(uint32) ch]; + } +public: + Delimiter() + :m_has_delimiter_cached(false) + { + bzero(&m_delimiter, sizeof(m_delimiter)); + } + bool has_delimiter() const + { + return m_has_delimiter_cached; + } + bool set_delimiter_unescape(const char *str) + { + m_has_delimiter_cached= false; + for ( ; *str; str++) + { + if (*str < 0) + return true; + if (*str == '\\') { - *s= '\0'; - length= s - from; - break; + char unescaped; + str++; + if (!*str || unescape(&unescaped, *str)) + return true; + m_delimiter[(uint) unescaped]= true; } + else + m_delimiter[(uint) *str]= true; } - - if (!length) + m_has_delimiter_cached= has_delimiter_slow(); + return false; + } + size_t get_delimiter_length(const char *str, const char *end) const + { + const char *str0= str; + for ( ; str < end; str++) + { + if (!is_delimiter(*str)) + break; + } + return str - str0; + } + size_t get_data_length(const char *str, const char *end) const + { + const char *str0= str; + for ( ; str < end; str++) { - puts(""); - continue; + if (is_delimiter(*str)) + break; } + return str - str0; + } +}; + - length= my_convert(to, (uint32) (sizeof(to) - 1), m_tocs, - from, (uint32) length, m_fromcs, - &errors); - to[length]= '\0'; - if (unlikely(!length || errors) && !m_continue) +class Conv_inbuf +{ + const char *m_ptr; + const char *m_end; +public: + Conv_inbuf(const char *from, size_t length) + :m_ptr(from), m_end(from + length) + { } + const char *ptr() const { return m_ptr; } + const char *end() const { return m_end; } + size_t length() const + { + return m_end - m_ptr; + } +private: + LEX_CSTRING get_prefix(size_t len) + { + LEX_CSTRING res; + res.str= ptr(); + res.length= len; + m_ptr+= len; + return res; + } + LEX_CSTRING get_empty_string() const + { + static LEX_CSTRING str= {NULL, 0}; + return str; + } +public: + LEX_CSTRING get_delimiter_chunk(const Delimiter &delimiter) + { + if (!delimiter.has_delimiter()) + return get_empty_string(); + size_t len= delimiter.get_delimiter_length(ptr(), end()); + return get_prefix(len); + } + LEX_CSTRING get_data_chunk(const Delimiter &delimiter) + { + if (!delimiter.has_delimiter()) + return get_prefix(length()); + size_t len= delimiter.get_data_length(ptr(), end()); + return get_prefix(len); + } +}; + + +class Conv_outbuf: public Binary_string +{ +public: + bool alloc(size_t out_max_length) + { + if (out_max_length >= UINT_MAX32) + { + fprintf(stderr, "The data needs a too large output buffer\n"); return true; - else - puts(to); + } + if (Binary_string::alloc((uint32) out_max_length)) + { + fprintf(stderr, "Failed to allocate the output buffer\n"); + return true; + } + return false; } +}; - return false; -} /* convert */ + +class Conv: public String_copier, public Delimiter +{ + CHARSET_INFO *m_tocs; + CHARSET_INFO *m_fromcs; + bool m_continue; +public: + Conv(CHARSET_INFO *tocs, CHARSET_INFO *fromcs, bool opt_continue) + :m_tocs(tocs), m_fromcs(fromcs), m_continue(opt_continue) + { } + size_t out_buffer_max_length(size_t from_length) const + { + return from_length / m_fromcs->mbminlen * m_tocs->mbmaxlen; + } + bool convert_data(const char *from, size_t length); + bool convert_binary_stream(FILE *file) + { + File_buffer buf; + return buf.load_binary_stream(file) || + convert_data(buf.ptr(), buf.length()); + } + bool convert_binary_file_by_name(const char *filename) + { + File_buffer buf; + return buf.load_binary_file_by_name(filename)|| + convert_data(buf.ptr(), buf.length()); + } +private: + void report_error(const char *from) const + { + if (well_formed_error_pos()) + { + fflush(stdout); + fprintf(stderr, + "Illegal %s byte sequence at position %d\n", + m_fromcs->csname, + (uint) (well_formed_error_pos() - from)); + } + else if (cannot_convert_error_pos()) + { + fflush(stdout); + fprintf(stderr, + "Conversion from %s to %s failed at position %d\n", + m_fromcs->csname, m_tocs->csname, + (uint) (cannot_convert_error_pos() - from)); + } + } + size_t write(const char *str, size_t length) const + { + return my_fwrite(stdout, (uchar *) str, length, MY_WME); + } +}; -bool Conv::convert_file_by_name(const char *filename) const +bool Conv::convert_data(const char *from, size_t from_length) { - FILE *fp; - if ((fp= fopen(filename, "r")) == NULL) + Conv_inbuf inbuf(from, from_length); + Conv_outbuf outbuf; + + if (outbuf.alloc(out_buffer_max_length(from_length))) + return true; + + for ( ; ; ) { - printf("can't open file %s", filename); - return 1; + LEX_CSTRING delim, data; + + delim= inbuf.get_delimiter_chunk(*this); + if (delim.length) + write(delim.str, delim.length); + + data= inbuf.get_data_chunk(*this); + if (!data.length) + break; + size_t length= well_formed_copy(m_tocs, + (char *) outbuf.ptr(), + outbuf.alloced_length(), + m_fromcs, data.str, data.length); + outbuf.length((uint32) length); + + if (most_important_error_pos() && !m_continue) + { + report_error(from); + return true; + } + write(outbuf.ptr(), outbuf.length()); } - bool rc= convert_file(fp); - fclose(fp); - return rc; + return false; } @@ -159,7 +416,7 @@ public: MYSQL_SERVER_VERSION, SYSTEM_TYPE, MACHINE_TYPE); puts("Character set conversion utility for MariaDB"); puts("Usage:"); - printf("%s [-f encoding] [-t encoding] [inputfile]...\n", my_progname); + printf("%s [OPTION...] [FILE...]\n", my_progname); my_print_help(long_options); } }; @@ -190,17 +447,32 @@ int main(int argc, char *argv[]) } Conv conv(charset_info_to, charset_info_from, opt.m_continue); + if (opt.m_delimiter) + { + if (charset_info_from->mbminlen > 1 || + charset_info_to->mbminlen > 1) + { + fprintf(stderr, "--delimiter cannot be used with %s to %s conversion\n", + charset_info_from->csname, charset_info_to->csname); + return 1; + } + if (conv.set_delimiter_unescape(opt.m_delimiter)) + { + fprintf(stderr, "Bad --delimiter value\n"); + return 1; + } + } if (argc == 0) { - if (conv.convert_file(stdin)) + if (conv.convert_binary_stream(stdin)) return 1; } else { for (int i= 0; i < argc; i++) { - if (conv.convert_file_by_name(argv[i])) + if (conv.convert_binary_file_by_name(argv[i])) return 1; } } diff --git a/mysql-test/std_data/mariadb-conv/file01.utf16.txt b/mysql-test/std_data/mariadb-conv/file01.utf16.txt Binary files differnew file mode 100644 index 00000000000..1ff100e48a1 --- /dev/null +++ b/mysql-test/std_data/mariadb-conv/file01.utf16.txt diff --git a/mysql-test/std_data/mariadb-conv/file01.utf8.txt b/mysql-test/std_data/mariadb-conv/file01.utf8.txt new file mode 100644 index 00000000000..cc065b71877 --- /dev/null +++ b/mysql-test/std_data/mariadb-conv/file01.utf8.txt @@ -0,0 +1,3 @@ +aaa +xxxÑÑÑxxx +bbb diff --git a/mysql-test/std_data/mariadb-conv/file02.latin1.txt b/mysql-test/std_data/mariadb-conv/file02.latin1.txt new file mode 100644 index 00000000000..28648e6f1e1 --- /dev/null +++ b/mysql-test/std_data/mariadb-conv/file02.latin1.txt @@ -0,0 +1,3 @@ +aaa +xxxßßßxxx +bbb diff --git a/mysql-test/suite/client/mariadb-conv-cp932.result b/mysql-test/suite/client/mariadb-conv-cp932.result index e18e36b81c9..efa428b9dde 100644 --- a/mysql-test/suite/client/mariadb-conv-cp932.result +++ b/mysql-test/suite/client/mariadb-conv-cp932.result @@ -21,6 +21,8 @@ BINARY CONVERT(a USING filename) ‘ªŽŽŽ‘—¿2 test/‘ªŽŽŽ‘—¿.frm test/‘ªŽŽŽ‘—¿2.frm +test/‘ªŽŽŽ‘—¿.frm +test/‘ªŽŽŽ‘—¿2.frm DROP TABLE t1; # bulk convert with file # --- Start of mariadb-conv for mysql-conv-test-cp932.txt --- diff --git a/mysql-test/suite/client/mariadb-conv-cp932.test b/mysql-test/suite/client/mariadb-conv-cp932.test index 40c9ba64044..1a9263b3f90 100644 --- a/mysql-test/suite/client/mariadb-conv-cp932.test +++ b/mysql-test/suite/client/mariadb-conv-cp932.test @@ -10,26 +10,27 @@ SET NAMES cp932; --let $MYSQLD_DATADIR= `select @@datadir` # simple I/O ---exec echo "‘ªŽŽŽ‘—¿" | $MARIADB_CONV -f cp932 -t filename ---exec echo "@6e2c@8a66@8cc7@6599@5eab" | $MARIADB_CONV -f filename -t cp932 +--exec echo "‘ªŽŽŽ‘—¿" | $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n" +--exec echo "@6e2c@8a66@8cc7@6599@5eab" | $MARIADB_CONV -f filename -t cp932 --delimiter="\r\n" # undo query result --let $query_result=`SELECT CONVERT(CONVERT('‘ªŽŽŽ‘—¿' USING filename) USING binary);` --echo $query_result ---exec echo $query_result | $MARIADB_CONV -f filename -t cp932 +--exec echo $query_result | $MARIADB_CONV -f filename -t cp932 --delimiter="\r\n" --let $reverse_query_result=`SELECT CONVERT(_filename '@6e2c@8a66@8cc7@6599@5eab' USING cp932);` --echo $reverse_query_result ---exec echo $reverse_query_result | $MARIADB_CONV -f cp932 -t filename +--exec echo $reverse_query_result | $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n" --echo # bulk convert with pipe CREATE TABLE t1 (id SERIAL, a VARCHAR(64) CHARACTER SET cp932); INSERT INTO t1 (a) VALUES ('‘ªŽŽŽ‘—¿'), ('‘ªŽŽŽ‘—¿2'); ---exec $MYSQL -Dtest --default-character-set=cp932 -e "SELECT a FROM t1 ORDER BY id" | $MARIADB_CONV -f cp932 -t filename +--exec $MYSQL -Dtest --default-character-set=cp932 -e "SELECT a FROM t1 ORDER BY id" | $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n" --exec $MYSQL -Dtest --default-character-set=cp932 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" ---exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t cp932 ---exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $REPLACE "/" "@002f" "." "@002e"| $MARIADB_CONV -f filename -t cp932 +--exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t cp932 --delimiter="\r\n" +--exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $REPLACE "/" "@002f" "." "@002e"| $MARIADB_CONV -f filename -t cp932 --delimiter="\r\n" +--exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t cp932 --delimiter="/.\r\n" DROP TABLE t1; @@ -44,12 +45,12 @@ DROP TABLE t1; EOF --echo # --- Start of mariadb-conv for mysql-conv-test-cp932.txt --- ---exec $MARIADB_CONV -f cp932 -t filename $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt +--exec $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n" $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt --echo # --- End of mariadb-conv for mysql-conv-test-cp932.txt --- --copy_file $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt $MYSQL_TMP_DIR/mysql-conv-test-cp932-2.txt --echo # --- Start of mariadb-conv for mysql-conv-test-cp932.txt and mysql-conv-test-cp932-2.txt --- ---exec $MARIADB_CONV -f cp932 -t filename $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt $MYSQL_TMP_DIR/mysql-conv-test-cp932-2.txt +--exec $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n" $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt $MYSQL_TMP_DIR/mysql-conv-test-cp932-2.txt --echo # --- Start of mariadb-conv for mysql-conv-test-cp932.txt and mysql-conv-test-cp932-2.txt --- --remove_file $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt diff --git a/mysql-test/suite/client/mariadb-conv-utf16.result b/mysql-test/suite/client/mariadb-conv-utf16.result new file mode 100644 index 00000000000..76b98ef81ce --- /dev/null +++ b/mysql-test/suite/client/mariadb-conv-utf16.result @@ -0,0 +1,13 @@ +# +# MDEV-17088 Provide tools to encode/decode mysql-encoded file system names +# +SET NAMES utf8; +# Bad delimiter +--delimiter cannot be used with utf16 to utf8 conversion +# Bad delimiter +--delimiter cannot be used with utf8 to utf16 conversion +# Start of file01.utf16.txt +aaa +xxxÑÑÑxxx +bbb +# End of file01.utf16.txt diff --git a/mysql-test/suite/client/mariadb-conv-utf16.test b/mysql-test/suite/client/mariadb-conv-utf16.test new file mode 100644 index 00000000000..98f9e8d28f1 --- /dev/null +++ b/mysql-test/suite/client/mariadb-conv-utf16.test @@ -0,0 +1,21 @@ +-- source include/have_utf16.inc +-- source include/not_embedded.inc + +--echo # +--echo # MDEV-17088 Provide tools to encode/decode mysql-encoded file system names +--echo # + +--character_set utf8 +SET NAMES utf8; + +--echo # Bad delimiter +--error 1 +--exec $MARIADB_CONV -f utf16 -t utf8 --delimiter="\r\n" $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf16.txt 2>&1 + +--echo # Bad delimiter +--error 1 +--exec $MARIADB_CONV -f utf8 -t utf16 --delimiter="\r\n" $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf8.txt 2>&1 + +--echo # Start of file01.utf16.txt +--exec $MARIADB_CONV -f utf16 -t utf8 $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf16.txt 2>&1 +--echo # End of file01.utf16.txt diff --git a/mysql-test/suite/client/mariadb-conv-utf8.result b/mysql-test/suite/client/mariadb-conv-utf8.result index b38cb118c72..cae8268d523 100644 --- a/mysql-test/suite/client/mariadb-conv-utf8.result +++ b/mysql-test/suite/client/mariadb-conv-utf8.result @@ -21,6 +21,8 @@ BINARY CONVERT(a USING filename) 測試資料2 test/測試資料.frm test/測試資料2.frm +test/測試資料.frm +test/測試資料2.frm DROP TABLE t1; # bulk convert with file # --- Start of mariadb-conv for mysql-conv-test-utf8.txt --- diff --git a/mysql-test/suite/client/mariadb-conv-utf8.test b/mysql-test/suite/client/mariadb-conv-utf8.test index 2d5bfb4efe9..5002aadd6e9 100644 --- a/mysql-test/suite/client/mariadb-conv-utf8.test +++ b/mysql-test/suite/client/mariadb-conv-utf8.test @@ -10,26 +10,27 @@ SET NAMES utf8; --let $MYSQLD_DATADIR= `select @@datadir` # simple I/O ---exec echo "測試資料" | $MARIADB_CONV -f utf8 -t filename ---exec echo "@6e2c@8a66@8cc7@6599@5eab" | $MARIADB_CONV -f filename -t utf8 +--exec echo "測試資料" | $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n" +--exec echo "@6e2c@8a66@8cc7@6599@5eab" | $MARIADB_CONV -f filename -t utf8 --delimiter="\r\n" # undo query result --let $query_result=`SELECT CONVERT(CONVERT('測試資料' USING filename) USING binary);` --echo $query_result ---exec echo $query_result | $MARIADB_CONV -f filename -t utf8 +--exec echo $query_result | $MARIADB_CONV -f filename -t utf8 --delimiter="\r\n" --let $reverse_query_result=`SELECT CONVERT(_filename '@6e2c@8a66@8cc7@6599@5eab' USING utf8);` --echo $reverse_query_result ---exec echo $reverse_query_result | $MARIADB_CONV -f utf8 -t filename +--exec echo $reverse_query_result | $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n" --echo # bulk convert with pipe CREATE TABLE t1 (id SERIAL, a VARCHAR(64) CHARACTER SET utf8); INSERT INTO t1 (a) VALUES ('測試資料'), ('測試資料2'); ---exec $MYSQL -Dtest --default-character-set=utf8 -e "SELECT a FROM t1 ORDER BY id" | $MARIADB_CONV -f utf8 -t filename +--exec $MYSQL -Dtest --default-character-set=utf8 -e "SELECT a FROM t1 ORDER BY id" | $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n" --exec $MYSQL -Dtest --default-character-set=utf8 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" ---exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t utf8 ---exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $REPLACE "/" "@002f" "." "@002e"| $MARIADB_CONV -f filename -t utf8 +--exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t utf8 --delimiter="\r\n" +--exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $REPLACE "/" "@002f" "." "@002e"| $MARIADB_CONV -f filename -t utf8 --delimiter="\r\n" +--exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t utf8 --delimiter="/.\r\n" DROP TABLE t1; @@ -44,12 +45,12 @@ DROP TABLE t1; EOF --echo # --- Start of mariadb-conv for mysql-conv-test-utf8.txt --- ---exec $MARIADB_CONV -f utf8 -t filename $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt +--exec $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n" $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt --echo # --- End of mariadb-conv for mysql-conv-test-utf8.txt --- --copy_file $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt $MYSQL_TMP_DIR/mysql-conv-test-utf8-2.txt --echo # --- Start of mariadb-conv for mysql-conv-test-utf8.txt and mysql-conv-test-utf8-2.txt --- ---exec $MARIADB_CONV -f utf8 -t filename $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt $MYSQL_TMP_DIR/mysql-conv-test-utf8-2.txt +--exec $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n" $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt $MYSQL_TMP_DIR/mysql-conv-test-utf8-2.txt --echo # --- Start of mariadb-conv for mysql-conv-test-utf8.txt and mysql-conv-test-utf8-2.txt --- --remove_file $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt diff --git a/mysql-test/suite/client/mariadb-conv.result b/mysql-test/suite/client/mariadb-conv.result index 1677a13de1d..432be04daae 100644 --- a/mysql-test/suite/client/mariadb-conv.result +++ b/mysql-test/suite/client/mariadb-conv.result @@ -11,3 +11,15 @@ mariadb-conv: unknown option '-r' Character set unknown-cs is not supported # unknown "from" character set Character set unknown-cs is not supported +# Bad delimiter +Bad --delimiter value +# Conversion error +Conversion from utf8 to latin1 failed at position 7 +aaa +xxx???xxx +bbb +# Bad input character +Illegal utf8 byte sequence at position 7 +aaa +xxx???xxx +bbb diff --git a/mysql-test/suite/client/mariadb-conv.test b/mysql-test/suite/client/mariadb-conv.test index 2046b876b69..2be758b6fcf 100644 --- a/mysql-test/suite/client/mariadb-conv.test +++ b/mysql-test/suite/client/mariadb-conv.test @@ -6,8 +6,8 @@ --echo # default encoding --exec echo "t1" | $MARIADB_CONV ---exec echo "t1" | $MARIADB_CONV -f filename ---exec echo "t1" | $MARIADB_CONV -t filename +--exec echo "t1" | $MARIADB_CONV -f filename --delimiter="\r\n" +--exec echo "t1" | $MARIADB_CONV -t filename --delimiter="\r\n" --echo # invalid option --replace_regex /.*mariadb-conv.*: unknown/mariadb-conv: unknown/ @@ -23,3 +23,17 @@ --replace_regex /.*mariadb-conv.*: unknown/mariadb-conv: unknown/ --error 1 --exec echo "t1" | $MARIADB_CONV -f unknown-cs -t latin1 2>&1 > /dev/null + +--echo # Bad delimiter +--error 1 +--exec echo "t1" | $MARIADB_CONV --delimiter="\x" 2>&1 > /dev/null + +--echo # Conversion error +--error 1 +--exec $MARIADB_CONV -f utf8 -t latin1 < $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf8.txt 2>&1 +--exec $MARIADB_CONV -f utf8 -t latin1 -c < $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf8.txt 2>&1 + +--echo # Bad input character +--error 1 +--exec $MARIADB_CONV -f utf8 -t latin1 < $MYSQL_TEST_DIR/std_data/mariadb-conv/file02.latin1.txt 2>&1 +--exec $MARIADB_CONV -f utf8 -t latin1 -c < $MYSQL_TEST_DIR/std_data/mariadb-conv/file02.latin1.txt 2>&1 |