diff options
Diffstat (limited to 'sql/sql_load.cc')
-rw-r--r-- | sql/sql_load.cc | 51 |
1 files changed, 50 insertions, 1 deletions
diff --git a/sql/sql_load.cc b/sql/sql_load.cc index 509df96e89d..c25e73e7346 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -616,7 +616,8 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, *enclosed, skip_lines, ignore); thd_proc_info(thd, "End bulk insert"); - thd_progress_next_stage(thd); + if (!error) + thd_progress_next_stage(thd); if (thd->locked_tables_mode <= LTM_LOCK_TABLES && table->file->ha_end_bulk_insert() && !error) { @@ -1525,6 +1526,54 @@ inline bool READ_INFO::terminator(const uchar *ptr, uint length) } +/** + Read a field. + + The data in the loaded file was presumably escaped using + - either select_export::send_data() OUTFILE + - or mysql_real_escape_string() + using the same character set with the one specified in the current + "LOAD DATA INFILE ... CHARACTER SET ..." (or the default LOAD character set). + + Note, non-escaped multi-byte characters are scanned as a single entity. + This is needed to correctly distinguish between: + - 0x5C as an escape character versus + - 0x5C as the second byte in a multi-byte sequence (big5, cp932, gbk, sjis) + + Parts of escaped multi-byte characters are scanned on different loop + iterations. See the comment about 0x5C handling in select_export::send_data() + in sql_class.cc. + + READ_INFO::read_field() does not check wellformedness. + Raising wellformedness errors or warnings in READ_INFO::read_field() + would be wrong, as the data after unescaping can go into a BLOB field, + or into a TEXT/VARCHAR field of a different character set. + The loop below only makes sure to revert escaping made by + select_export::send_data() or mysql_real_escape_string(). + Wellformedness is checked later, during Field::store(str,length,cs) time. + + Note, in some cases users can supply data which did not go through + escaping properly. For example, utf8 "\<C3><A4>" + (backslash followed by LATIN SMALL LETTER A WITH DIAERESIS) + is improperly escaped data that could not be generated by + select_export::send_data() / mysql_real_escape_string(): + - either there should be two backslashes: "\\<C3><A4>" + - or there should be no backslashes at all: "<C3><A4>" + "\<C3>" and "<A4> are scanned on two different loop iterations and + store "<C3><A4>" into the field. + + Note, adding useless escapes before multi-byte characters like in the + example above is safe in case of utf8, but is not safe in case of + character sets that have escape_with_backslash_is_dangerous==TRUE, + such as big5, cp932, gbk, sjis. This can lead to mis-interpretation of the + data. Suppose we have a big5 character "<EE><5C>" followed by <30> (digit 0). + If we add an extra escape before this sequence, then we'll get + <5C><EE><5C><30>. The first loop iteration will turn <5C><EE> into <EE>. + The second loop iteration will turn <5C><30> into <30>. + So the program that generates a dump file for further use with LOAD DATA + must make sure to use escapes properly. +*/ + int READ_INFO::read_field() { int chr,found_enclosed_char; |