summaryrefslogtreecommitdiff
path: root/sql/sql_load.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/sql_load.cc')
-rw-r--r--sql/sql_load.cc51
1 files changed, 50 insertions, 1 deletions
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index 509df96e89d..c25e73e7346 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -616,7 +616,8 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
*enclosed, skip_lines, ignore);
thd_proc_info(thd, "End bulk insert");
- thd_progress_next_stage(thd);
+ if (!error)
+ thd_progress_next_stage(thd);
if (thd->locked_tables_mode <= LTM_LOCK_TABLES &&
table->file->ha_end_bulk_insert() && !error)
{
@@ -1525,6 +1526,54 @@ inline bool READ_INFO::terminator(const uchar *ptr, uint length)
}
+/**
+ Read a field.
+
+ The data in the loaded file was presumably escaped using
+ - either select_export::send_data() OUTFILE
+ - or mysql_real_escape_string()
+ using the same character set with the one specified in the current
+ "LOAD DATA INFILE ... CHARACTER SET ..." (or the default LOAD character set).
+
+ Note, non-escaped multi-byte characters are scanned as a single entity.
+ This is needed to correctly distinguish between:
+ - 0x5C as an escape character versus
+ - 0x5C as the second byte in a multi-byte sequence (big5, cp932, gbk, sjis)
+
+ Parts of escaped multi-byte characters are scanned on different loop
+ iterations. See the comment about 0x5C handling in select_export::send_data()
+ in sql_class.cc.
+
+ READ_INFO::read_field() does not check wellformedness.
+ Raising wellformedness errors or warnings in READ_INFO::read_field()
+ would be wrong, as the data after unescaping can go into a BLOB field,
+ or into a TEXT/VARCHAR field of a different character set.
+ The loop below only makes sure to revert escaping made by
+ select_export::send_data() or mysql_real_escape_string().
+ Wellformedness is checked later, during Field::store(str,length,cs) time.
+
+ Note, in some cases users can supply data which did not go through
+ escaping properly. For example, utf8 "\<C3><A4>"
+ (backslash followed by LATIN SMALL LETTER A WITH DIAERESIS)
+ is improperly escaped data that could not be generated by
+ select_export::send_data() / mysql_real_escape_string():
+ - either there should be two backslashes: "\\<C3><A4>"
+ - or there should be no backslashes at all: "<C3><A4>"
+ "\<C3>" and "<A4> are scanned on two different loop iterations and
+ store "<C3><A4>" into the field.
+
+ Note, adding useless escapes before multi-byte characters like in the
+ example above is safe in case of utf8, but is not safe in case of
+ character sets that have escape_with_backslash_is_dangerous==TRUE,
+ such as big5, cp932, gbk, sjis. This can lead to mis-interpretation of the
+ data. Suppose we have a big5 character "<EE><5C>" followed by <30> (digit 0).
+ If we add an extra escape before this sequence, then we'll get
+ <5C><EE><5C><30>. The first loop iteration will turn <5C><EE> into <EE>.
+ The second loop iteration will turn <5C><30> into <30>.
+ So the program that generates a dump file for further use with LOAD DATA
+ must make sure to use escapes properly.
+*/
+
int READ_INFO::read_field()
{
int chr,found_enclosed_char;