1 files changed, 50 insertions, 1 deletions
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index 509df96e89d..c25e73e7346 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -616,7 +616,8 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
 			    *enclosed, skip_lines, ignore);
 
     thd_proc_info(thd, "End bulk insert");
-    thd_progress_next_stage(thd);
+    if (!error)
+      thd_progress_next_stage(thd);
     if (thd->locked_tables_mode <= LTM_LOCK_TABLES &&
         table->file->ha_end_bulk_insert() && !error)
     {
@@ -1525,6 +1526,54 @@ inline bool READ_INFO::terminator(const uchar *ptr, uint length)
 }
 
 
+/**
+  Read a field.
+
+  The data in the loaded file was presumably escaped using
+  - either select_export::send_data() OUTFILE
+  - or mysql_real_escape_string()
+  using the same character set with the one specified in the current
+  "LOAD DATA INFILE ... CHARACTER SET ..." (or the default LOAD character set).
+
+  Note, non-escaped multi-byte characters are scanned as a single entity.
+  This is needed to correctly distinguish between:
+  - 0x5C as an escape character versus
+  - 0x5C as the second byte in a multi-byte sequence (big5, cp932, gbk, sjis)
+
+  Parts of escaped multi-byte characters are scanned on different loop
+  iterations. See the comment about 0x5C handling in select_export::send_data()
+  in sql_class.cc.
+
+  READ_INFO::read_field() does not check wellformedness.
+  Raising wellformedness errors or warnings in READ_INFO::read_field()
+  would be wrong, as the data after unescaping can go into a BLOB field,
+  or into a TEXT/VARCHAR field of a different character set.
+  The loop below only makes sure to revert escaping made by
+  select_export::send_data() or mysql_real_escape_string().
+  Wellformedness is checked later, during Field::store(str,length,cs) time.
+
+  Note, in some cases users can supply data which did not go through
+  escaping properly. For example, utf8 "\<C3><A4>"
+  (backslash followed by LATIN SMALL LETTER A WITH DIAERESIS)
+  is improperly escaped data that could not be generated by
+  select_export::send_data() / mysql_real_escape_string():
+  - either there should be two backslashes:   "\\<C3><A4>"
+  - or there should be no backslashes at all: "<C3><A4>"
+  "\<C3>" and "<A4> are scanned on two different loop iterations and
+  store "<C3><A4>" into the field.
+
+  Note, adding useless escapes before multi-byte characters like in the
+  example above is safe in case of utf8, but is not safe in case of
+  character sets that have escape_with_backslash_is_dangerous==TRUE,
+  such as big5, cp932, gbk, sjis. This can lead to mis-interpretation of the
+  data. Suppose we have a big5 character "<EE><5C>" followed by <30> (digit 0).
+  If we add an extra escape before this sequence, then we'll get
+  <5C><EE><5C><30>. The first loop iteration will turn <5C><EE> into <EE>.
+  The second loop iteration will turn <5C><30> into <30>.
+  So the program that generates a dump file for further use with LOAD DATA
+  must make sure to use escapes properly.
+*/
+
 int READ_INFO::read_field()
 {
   int chr,found_enclosed_char;