diff options
Diffstat (limited to 'sql/sql_load.cc')
-rw-r--r-- | sql/sql_load.cc | 737 |
1 files changed, 652 insertions, 85 deletions
diff --git a/sql/sql_load.cc b/sql/sql_load.cc index 01961cec04a..b4f8b107f9b 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -1,5 +1,6 @@ /* - Copyright (c) 2000, 2012, Oracle and/or its affiliates. + Copyright (c) 2000, 2014, Oracle and/or its affiliates. + Copyright (c) 2010, 2014, Monty Progrm Ab This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -12,27 +13,63 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*/ + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* Copy data from a textfile to table */ -#include "mysql_priv.h" +/* 2006-12 Erik Wetterberg : LOAD XML added */ + +#include "sql_priv.h" +#include "unireg.h" +#include "sql_load.h" +#include "sql_load.h" +#include "sql_cache.h" // query_cache_* +#include "sql_base.h" // fill_record_n_invoke_before_triggers #include <my_dir.h> +#include "sql_view.h" // check_key_in_view +#include "sql_insert.h" // check_that_all_fields_are_given_values, + // prepare_triggers_for_insert_stmt, + // write_record +#include "sql_acl.h" // INSERT_ACL, UPDATE_ACL +#include "log_event.h" // Delete_file_log_event, + // Execute_load_query_log_event, + // LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F #include <m_ctype.h> #include "rpl_mi.h" #include "sql_repl.h" #include "sp_head.h" #include "sql_trigger.h" +#include "sql_derived.h" #include "sql_show.h" +class XML_TAG { +public: + int level; + String field; + String value; + XML_TAG(int l, String f, String v); +}; + + +XML_TAG::XML_TAG(int l, String f, String v) +{ + level= l; + field.append(f); + value.append(v); +} + + +#define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache)) +#define PUSH(A) *(stack_pos++)=(A) + class READ_INFO { File file; uchar *buffer, /* Buffer for read text */ *end_of_buff; /* Data in bufferts ends here */ uint buff_length, /* Length of buffert */ max_length; /* Max length of row */ - char *field_term_ptr,*line_term_ptr,*line_start_ptr,*line_start_end; + const uchar *field_term_ptr,*line_term_ptr; + const char *line_start_ptr,*line_start_end; uint field_term_length,line_term_length,enclosed_length; int field_term_char,line_term_char,enclosed_char,escape_char; int *stack,*stack_pos; @@ -40,6 +77,7 @@ class READ_INFO { bool need_end_io_cache; IO_CACHE cache; NET *io_net; + int level; /* for load xml */ public: bool error,line_cuted,found_null,enclosed; @@ -55,8 +93,14 @@ public: int read_fixed_length(void); int next_line(void); char unescape(char chr); - int terminator(char *ptr,uint length); + int terminator(const uchar *ptr, uint length); bool find_start_of_fields(); + /* load xml */ + List<XML_TAG> taglist; + int read_value(int delim, String *val); + int read_xml(); + int clear_level(int level); + /* We need to force cache close before destructor is invoked to log the last read block @@ -75,6 +119,15 @@ public: either the table or THD value */ void set_io_cache_arg(void* arg) { cache.arg = arg; } + + /** + skip all data till the eof. + */ + void skip_data_till_eof() + { + while (GET != my_b_EOF) + ; + } }; static int read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, @@ -87,14 +140,16 @@ static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, List<Item> &set_values, READ_INFO &read_info, String &enclosed, ulong skip_lines, bool ignore_check_option_errors); + +static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, + List<Item> &fields_vars, List<Item> &set_fields, + List<Item> &set_values, READ_INFO &read_info, + String &enclosed, ulong skip_lines, + bool ignore_check_option_errors); + #ifndef EMBEDDED_LIBRARY -static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex, - const char* db_arg, /* table's database */ - const char* table_name_arg, - enum enum_duplicates duplicates, - bool ignore, - bool transactional_table, - int errocode); +static bool write_execute_load_query_log_event(THD *, sql_exchange*, const + char*, const char*, bool, enum enum_duplicates, bool, bool, int); #endif /* EMBEDDED_LIBRARY */ /* @@ -134,6 +189,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, #ifndef EMBEDDED_LIBRARY LOAD_FILE_INFO lf_info; killed_state killed_status; + bool is_concurrent; #endif char *db = table_list->db; // This is never null /* @@ -152,7 +208,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, load data infile, so in mixed mode we go to row-based for avoiding the problem. */ - thd->set_current_stmt_binlog_row_based_if_mixed(); + thd->set_current_stmt_binlog_format_row_if_mixed(); #ifdef EMBEDDED_LIBRARY read_file_from_client = 0; //server is always in the same process @@ -175,7 +231,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, ER(WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED)); } - if (open_and_lock_tables(thd, table_list)) + if (open_and_lock_tables(thd, table_list, TRUE, 0)) DBUG_RETURN(TRUE); if (mysql_handle_single_derived(thd->lex, table_list, DT_MERGE_FOR_INSERT) || mysql_handle_single_derived(thd->lex, table_list, DT_PREPARE)) @@ -215,12 +271,21 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, table= table_list->table; transactional_table= table->file->has_transactions(); +#ifndef EMBEDDED_LIBRARY + is_concurrent= (table_list->lock_type == TL_WRITE_CONCURRENT_INSERT); +#endif if (!fields_vars.elements) { - Field **field; - for (field=table->field; *field ; field++) - fields_vars.push_back(new Item_field(*field)); + Field_iterator_table_ref field_iterator; + field_iterator.set(table_list); + for (; !field_iterator.end_of_fields(); field_iterator.next()) + { + Item *item; + if (!(item= field_iterator.create_item(thd))) + DBUG_RETURN(TRUE); + fields_vars.push_back(item->real_item()); + } bitmap_set_all(table->write_set); table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET; /* @@ -361,10 +426,9 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, // if we are not in slave thread, the file must be: if (!thd->slave_thread && - !((stat_info.st_mode & S_IROTH) == S_IROTH && // readable by others - (stat_info.st_mode & S_IFLNK) != S_IFLNK && // and not a symlink - ((stat_info.st_mode & S_IFREG) == S_IFREG || // and a regular file - (stat_info.st_mode & S_IFIFO) == S_IFIFO))) // or FIFO + !((stat_info.st_mode & S_IFLNK) != S_IFLNK && // symlink + ((stat_info.st_mode & S_IFREG) == S_IFREG || // regular file + (stat_info.st_mode & S_IFIFO) == S_IFIFO))) // named pipe { my_error(ER_TEXTFILE_NOT_READABLE, MYF(0), name); DBUG_RETURN(TRUE); @@ -372,8 +436,9 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, if ((stat_info.st_mode & S_IFIFO) == S_IFIFO) is_fifo= 1; #endif + if ((file= mysql_file_open(key_file_load, + name, O_RDONLY, MYF(MY_WME))) < 0) - if ((file=my_open(name,O_RDONLY,MYF(MY_WME))) < 0) DBUG_RETURN(TRUE); } @@ -391,8 +456,8 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, info.escape_char, read_file_from_client, is_fifo); if (read_info.error) { - if (file >= 0) - my_close(file,MYF(0)); // no files in net reading + if (file >= 0) + mysql_file_close(file, MYF(0)); // no files in net reading DBUG_RETURN(TRUE); // Can't allocate buffers } @@ -410,7 +475,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, thd->count_cuted_fields= CHECK_FIELD_WARN; /* calc cuted fields */ thd->cuted_fields=0L; /* Skip lines if there is a line terminator */ - if (ex->line_term->length()) + if (ex->line_term->length() && ex->filetype != FILETYPE_XML) { /* ex->skip_lines needs to be preserved for logging */ while (skip_lines > 0) @@ -432,7 +497,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, (!table->triggers || !table->triggers->has_delete_triggers())) table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE); - if (!thd->prelocked_mode) + if (thd->locked_tables_mode <= LTM_LOCK_TABLES) table->file->ha_start_bulk_insert((ha_rows) 0); table->copy_blobs=1; @@ -442,7 +507,11 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, MODE_STRICT_ALL_TABLES))); thd_progress_init(thd, 2); - if (!field_term->length() && !enclosed->length()) + if (ex->filetype == FILETYPE_XML) /* load xml */ + error= read_xml_field(thd, info, table_list, fields_vars, + set_fields, set_values, read_info, + *(ex->line_term), skip_lines, ignore); + else if (!field_term->length() && !enclosed->length()) error= read_fixed_length(thd, info, table_list, fields_vars, set_fields, set_values, read_info, skip_lines, ignore); @@ -450,10 +519,11 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, error= read_sep_field(thd, info, table_list, fields_vars, set_fields, set_values, read_info, *enclosed, skip_lines, ignore); - + thd_proc_info(thd, "End bulk insert"); thd_progress_next_stage(thd); - if (!thd->prelocked_mode && table->file->ha_end_bulk_insert() && !error) + if (thd->locked_tables_mode <= LTM_LOCK_TABLES && + table->file->ha_end_bulk_insert() && !error) { table->file->print_error(my_errno, MYF(0)); error= 1; @@ -463,7 +533,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, table->next_number_field=0; } if (file >= 0) - my_close(file,MYF(0)); + mysql_file_close(file, MYF(0)); free_blobs(table); /* if pack_blob was used */ table->copy_blobs=0; thd->count_cuted_fields= CHECK_FIELD_IGNORE; @@ -489,8 +559,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, if (error) { if (read_file_from_client) - while (!read_info.next_line()) - ; + read_info.skip_data_till_eof(); #ifndef EMBEDDED_LIBRARY if (mysql_bin_log.is_open()) @@ -530,6 +599,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, (void) write_execute_load_query_log_event(thd, ex, table_list->db, table_list->table_name, + is_concurrent, handle_duplicates, ignore, transactional_table, errcode); @@ -546,7 +616,8 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, goto err; } sprintf(name, ER(ER_LOAD_INFO), (ulong) info.records, (ulong) info.deleted, - (ulong) (info.records - info.copied), (ulong) thd->cuted_fields); + (ulong) (info.records - info.copied), + (ulong) thd->warning_info->statement_warn_count()); if (thd->transaction.stmt.modified_non_trans_table) thd->transaction.all.modified_non_trans_table= TRUE; @@ -561,8 +632,8 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, version for the binary log to mark that table maps are invalid after this point. */ - if (thd->current_stmt_binlog_row_based) - error= thd->binlog_flush_pending_rows_event(true); + if (thd->is_current_stmt_binlog_format_row()) + error= thd->binlog_flush_pending_rows_event(TRUE, transactional_table); else { /* @@ -576,6 +647,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, int errcode= query_error_code(thd, killed_status == NOT_KILLED); error= write_execute_load_query_log_event(thd, ex, table_list->db, table_list->table_name, + is_concurrent, handle_duplicates, ignore, transactional_table, errcode); @@ -611,6 +683,7 @@ err: static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex, const char* db_arg, /* table's database */ const char* table_name_arg, + bool is_concurrent, enum enum_duplicates duplicates, bool ignore, bool transactional_table, @@ -624,15 +697,12 @@ static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex, int n; const char *tdb= (thd->db != NULL ? thd->db : db_arg); const char *qualify_db= NULL; - char name_buffer[SAFE_NAME_LEN*2]; char command_buffer[1024]; - String string_buf(name_buffer, sizeof(name_buffer), - system_charset_info); String query_str(command_buffer, sizeof(command_buffer), system_charset_info); - Load_log_event lle(thd, ex, tdb, table_name_arg, fv, duplicates, - ignore, transactional_table); + Load_log_event lle(thd, ex, tdb, table_name_arg, fv, is_concurrent, + duplicates, ignore, transactional_table); /* force in a LOCAL if there was one in the original. @@ -667,10 +737,14 @@ static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex, { if (n++) query_str.append(", "); - if (item->name) + if (item->real_type() == Item::FIELD_ITEM) append_identifier(thd, &query_str, item->name, strlen(item->name)); else + { + /* Actually Item_user_var_as_out_param despite claiming STRING_ITEM. */ + DBUG_ASSERT(item->type() == Item::STRING_ITEM); ((Item_user_var_as_out_param *)item)->print_for_load(thd, &query_str); + } } query_str.append(")"); } @@ -680,17 +754,16 @@ static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex, List_iterator<Item> lu(thd->lex->update_list); List_iterator<Item> lv(thd->lex->value_list); - query_str.append(" SET "); + query_str.append(STRING_WITH_LEN(" SET ")); n= 0; while ((item= lu++)) { val= lv++; if (n++) - query_str.append(", "); + query_str.append(STRING_WITH_LEN(", ")); append_identifier(thd, &query_str, item->name, strlen(item->name)); - query_str.append("="); - val->print(&query_str, QT_ORDINARY); + query_str.append(val->name); } } @@ -702,7 +775,7 @@ static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex, (uint) (fname_start - 1), (uint) fname_end, (duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE : (ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR), - transactional_table, FALSE, errcode); + transactional_table, FALSE, FALSE, errcode); return mysql_bin_log.write(&e); } @@ -785,9 +858,10 @@ read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, if (pos == read_info.row_end) { thd->cuted_fields++; /* Not enough fields */ - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_WARN_TOO_FEW_RECORDS, - ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count); + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_WARN_TOO_FEW_RECORDS, + ER(ER_WARN_TOO_FEW_RECORDS), + thd->warning_info->current_row_for_warning()); if (!field->maybe_null() && field->type() == FIELD_TYPE_TIMESTAMP) ((Field_timestamp*) field)->set_time(); } @@ -808,9 +882,10 @@ read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, if (pos != read_info.row_end) { thd->cuted_fields++; /* To long row */ - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_WARN_TOO_MANY_RECORDS, - ER(ER_WARN_TOO_MANY_RECORDS), thd->row_count); + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_WARN_TOO_MANY_RECORDS, + ER(ER_WARN_TOO_MANY_RECORDS), + thd->warning_info->current_row_for_warning()); } if (thd->killed || @@ -843,11 +918,12 @@ read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, if (read_info.line_cuted) { thd->cuted_fields++; /* To long row */ - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_WARN_TOO_MANY_RECORDS, - ER(ER_WARN_TOO_MANY_RECORDS), thd->row_count); + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_WARN_TOO_MANY_RECORDS, + ER(ER_WARN_TOO_MANY_RECORDS), + thd->warning_info->current_row_for_warning()); } - thd->row_count++; + thd->warning_info->inc_current_row_for_warning(); continue_loop:; } DBUG_RETURN(test(read_info.error)); @@ -927,7 +1003,7 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, if (field->reset()) { my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0), field->field_name, - thd->row_count); + thd->warning_info->current_row_for_warning()); DBUG_RETURN(1); } field->set_null(); @@ -999,7 +1075,7 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, if (field->reset()) { my_error(ER_WARN_NULL_TO_NOTNULL, MYF(0),field->field_name, - thd->row_count); + thd->warning_info->current_row_for_warning()); DBUG_RETURN(1); } if (!field->maybe_null() && field->type() == FIELD_TYPE_TIMESTAMP) @@ -1013,7 +1089,8 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, thd->cuted_fields++; push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_WARN_TOO_FEW_RECORDS, - ER(ER_WARN_TOO_FEW_RECORDS), thd->row_count); + ER(ER_WARN_TOO_FEW_RECORDS), + thd->warning_info->current_row_for_warning()); } else if (item->type() == Item::STRING_ITEM) { @@ -1057,19 +1134,184 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, if (read_info.line_cuted) { thd->cuted_fields++; /* To long row */ - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_WARN_TOO_MANY_RECORDS, ER(ER_WARN_TOO_MANY_RECORDS), - thd->row_count); + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_WARN_TOO_MANY_RECORDS, ER(ER_WARN_TOO_MANY_RECORDS), + thd->warning_info->current_row_for_warning()); if (thd->killed) DBUG_RETURN(1); } - thd->row_count++; + thd->warning_info->inc_current_row_for_warning(); continue_loop:; } DBUG_RETURN(test(read_info.error)); } +/**************************************************************************** +** Read rows in xml format +****************************************************************************/ +static int +read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, + List<Item> &fields_vars, List<Item> &set_fields, + List<Item> &set_values, READ_INFO &read_info, + String &row_tag, ulong skip_lines, + bool ignore_check_option_errors) +{ + List_iterator_fast<Item> it(fields_vars); + Item *item; + TABLE *table= table_list->table; + bool no_trans_update_stmt; + CHARSET_INFO *cs= read_info.read_charset; + DBUG_ENTER("read_xml_field"); + + no_trans_update_stmt= !table->file->has_transactions(); + + for ( ; ; it.rewind()) + { + if (thd->killed) + { + thd->send_kill_message(); + DBUG_RETURN(1); + } + + // read row tag and save values into tag list + if (read_info.read_xml()) + break; + + List_iterator_fast<XML_TAG> xmlit(read_info.taglist); + xmlit.rewind(); + XML_TAG *tag= NULL; + +#ifndef DBUG_OFF + DBUG_PRINT("read_xml_field", ("skip_lines=%d", (int) skip_lines)); + while ((tag= xmlit++)) + { + DBUG_PRINT("read_xml_field", ("got tag:%i '%s' '%s'", + tag->level, tag->field.c_ptr(), + tag->value.c_ptr())); + } +#endif + + restore_record(table, s->default_values); + + while ((item= it++)) + { + /* If this line is to be skipped we don't want to fill field or var */ + if (skip_lines) + continue; + + /* find field in tag list */ + xmlit.rewind(); + tag= xmlit++; + + while(tag && strcmp(tag->field.c_ptr(), item->name) != 0) + tag= xmlit++; + + if (!tag) // found null + { + if (item->type() == Item::FIELD_ITEM) + { + Field *field= ((Item_field *) item)->field; + field->reset(); + field->set_null(); + if (field == table->next_number_field) + table->auto_increment_field_not_null= TRUE; + if (!field->maybe_null()) + { + if (field->type() == FIELD_TYPE_TIMESTAMP) + ((Field_timestamp *) field)->set_time(); + else if (field != table->next_number_field) + field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, + ER_WARN_NULL_TO_NOTNULL, 1); + } + } + else + ((Item_user_var_as_out_param *) item)->set_null_value(cs); + continue; + } + + if (item->type() == Item::FIELD_ITEM) + { + + Field *field= ((Item_field *)item)->field; + field->set_notnull(); + if (field == table->next_number_field) + table->auto_increment_field_not_null= TRUE; + field->store((char *) tag->value.ptr(), tag->value.length(), cs); + } + else + ((Item_user_var_as_out_param *) item)->set_value( + (char *) tag->value.ptr(), + tag->value.length(), cs); + } + + if (read_info.error) + break; + + if (skip_lines) + { + skip_lines--; + continue; + } + + if (item) + { + /* Have not read any field, thus input file is simply ended */ + if (item == fields_vars.head()) + break; + + for ( ; item; item= it++) + { + if (item->type() == Item::FIELD_ITEM) + { + /* + QQ: We probably should not throw warning for each field. + But how about intention to always have the same number + of warnings in THD::cuted_fields (and get rid of cuted_fields + in the end ?) + */ + thd->cuted_fields++; + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_WARN_TOO_FEW_RECORDS, + ER(ER_WARN_TOO_FEW_RECORDS), + thd->warning_info->current_row_for_warning()); + } + else + ((Item_user_var_as_out_param *)item)->set_null_value(cs); + } + } + + if (thd->killed || + fill_record_n_invoke_before_triggers(thd, set_fields, set_values, + ignore_check_option_errors, + table->triggers, + TRG_EVENT_INSERT)) + DBUG_RETURN(1); + + switch (table_list->view_check_option(thd, + ignore_check_option_errors)) { + case VIEW_CHECK_SKIP: + read_info.next_line(); + goto continue_loop; + case VIEW_CHECK_ERROR: + DBUG_RETURN(-1); + } + + if (write_record(thd, table, &info)) + DBUG_RETURN(1); + + /* + We don't need to reset auto-increment field since we are restoring + its default value at the beginning of each loop iteration. + */ + thd->transaction.stmt.modified_non_trans_table= no_trans_update_stmt; + thd->warning_info->inc_current_row_for_warning(); + continue_loop:; + } + DBUG_RETURN(test(read_info.error) || thd->is_error()); +} /* load xml end */ + + /* Unescape all escape characters, mark \N as null */ char @@ -1101,14 +1343,23 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs, String &field_term, String &line_start, String &line_term, String &enclosed_par, int escape, bool get_it_from_net, bool is_fifo) - :file(file_par), buff_length(tot_length), escape_char(escape), + :file(file_par), buffer(NULL), buff_length(tot_length), escape_char(escape), found_end_of_line(false), eof(false), need_end_io_cache(false), error(false), line_cuted(false), found_null(false), read_charset(cs) { - field_term_ptr=(char*) field_term.ptr(); + /* + Field and line terminators must be interpreted as sequence of unsigned char. + Otherwise, non-ascii terminators will be negative on some platforms, + and positive on others (depending on the implementation of char). + */ + field_term_ptr= + static_cast<const uchar*>(static_cast<const void*>(field_term.ptr())); field_term_length= field_term.length(); - line_term_ptr=(char*) line_term.ptr(); + line_term_ptr= + static_cast<const uchar*>(static_cast<const void*>(line_term.ptr())); line_term_length= line_term.length(); + + level= 0; /* for load xml */ if (line_start.length() == 0) { line_start_ptr=0; @@ -1116,7 +1367,7 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs, } else { - line_start_ptr=(char*) line_start.ptr(); + line_start_ptr= line_start.ptr(); line_start_end=line_start_ptr+line_start.length(); start_of_line= 1; } @@ -1125,12 +1376,12 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs, !memcmp(field_term_ptr,line_term_ptr,field_term_length)) { line_term_length=0; - line_term_ptr=(char*) ""; + line_term_ptr= NULL; } enclosed_char= (enclosed_length=enclosed_par.length()) ? (uchar) enclosed_par[0] : INT_MAX; - field_term_char= field_term_length ? (uchar) field_term_ptr[0] : INT_MAX; - line_term_char= line_term_length ? (uchar) line_term_ptr[0] : INT_MAX; + field_term_char= field_term_length ? field_term_ptr[0] : INT_MAX; + line_term_char= line_term_length ? line_term_ptr[0] : INT_MAX; /* Set of a stack for unget if long terminators */ uint length= max(cs->mbmaxlen, max(field_term_length, line_term_length)) + 1; @@ -1147,8 +1398,8 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs, (is_fifo ? READ_FIFO : READ_CACHE),0L,1, MYF(MY_WME))) { - my_free((uchar*) buffer,MYF(0)); /* purecov: inspected */ - buffer= 0; + my_free(buffer); /* purecov: inspected */ + buffer= NULL; error=1; } else @@ -1177,21 +1428,21 @@ READ_INFO::~READ_INFO() { if (need_end_io_cache) ::end_io_cache(&cache); - my_free(buffer, MYF(MY_ALLOW_ZERO_PTR)); + my_free(buffer); + List_iterator<XML_TAG> xmlit(taglist); + XML_TAG *t; + while ((t= xmlit++)) + delete(t); } -#define GET (stack_pos != stack ? *--stack_pos : my_b_get(&cache)) -#define PUSH(A) *(stack_pos++)=(A) - - -inline int READ_INFO::terminator(char *ptr,uint length) +inline int READ_INFO::terminator(const uchar *ptr,uint length) { int chr=0; // Keep gcc happy uint i; for (i=1 ; i < length ; i++) { - if ((chr=GET) != *++ptr) + if ((chr=GET) != *(uchar*)++ptr) { break; } @@ -1200,7 +1451,7 @@ inline int READ_INFO::terminator(char *ptr,uint length) return 1; PUSH(chr); while (i-- > 1) - PUSH((uchar) *--ptr); + PUSH(*--ptr); return 0; } @@ -1332,7 +1583,7 @@ int READ_INFO::read_field() if (my_mbcharlen(read_charset, chr) > 1 && to + my_mbcharlen(read_charset, chr) <= end_of_buff) { - uchar* p= (uchar*) to; + uchar* p= to; int ml, i; *to++ = chr; @@ -1357,7 +1608,7 @@ int READ_INFO::read_field() (const char *)to)) continue; for (i= 0; i < ml; i++) - PUSH((uchar) *--to); + PUSH(*--to); chr= GET; } #endif @@ -1506,7 +1757,7 @@ bool READ_INFO::find_start_of_fields() return 1; } } while ((char) chr != line_start_ptr[0]); - for (char *ptr=line_start_ptr+1 ; ptr != line_start_end ; ptr++) + for (const char *ptr=line_start_ptr+1 ; ptr != line_start_end ; ptr++) { chr=GET; // Eof will be checked later if ((char) chr != *ptr) @@ -1514,10 +1765,326 @@ bool READ_INFO::find_start_of_fields() PUSH(chr); while (--ptr != line_start_ptr) { // Restart with next char - PUSH((uchar) *ptr); + PUSH( *ptr); } goto try_again; } } return 0; } + + +/* + Clear taglist from tags with a specified level +*/ +int READ_INFO::clear_level(int level_arg) +{ + DBUG_ENTER("READ_INFO::read_xml clear_level"); + List_iterator<XML_TAG> xmlit(taglist); + xmlit.rewind(); + XML_TAG *tag; + + while ((tag= xmlit++)) + { + if(tag->level >= level_arg) + { + xmlit.remove(); + delete tag; + } + } + DBUG_RETURN(0); +} + + +/* + Convert an XML entity to Unicode value. + Return -1 on error; +*/ +static int +my_xml_entity_to_char(const char *name, uint length) +{ + if (length == 2) + { + if (!memcmp(name, "gt", length)) + return '>'; + if (!memcmp(name, "lt", length)) + return '<'; + } + else if (length == 3) + { + if (!memcmp(name, "amp", length)) + return '&'; + } + else if (length == 4) + { + if (!memcmp(name, "quot", length)) + return '"'; + if (!memcmp(name, "apos", length)) + return '\''; + } + return -1; +} + + +/** + @brief Convert newline, linefeed, tab to space + + @param chr character + + @details According to the "XML 1.0" standard, + only space (#x20) characters, carriage returns, + line feeds or tabs are considered as spaces. + Convert all of them to space (#x20) for parsing simplicity. +*/ +static int +my_tospace(int chr) +{ + return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr; +} + + +/* + Read an xml value: handle multibyte and xml escape +*/ +int READ_INFO::read_value(int delim, String *val) +{ + int chr; + String tmp; + + for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF;) + { +#ifdef USE_MB + if (my_mbcharlen(read_charset, chr) > 1) + { + DBUG_PRINT("read_xml",("multi byte")); + int i, ml= my_mbcharlen(read_charset, chr); + for (i= 1; i < ml; i++) + { + val->append(chr); + /* + Don't use my_tospace() in the middle of a multi-byte character + TODO: check that the multi-byte sequence is valid. + */ + chr= GET; + if (chr == my_b_EOF) + return chr; + } + } +#endif + if(chr == '&') + { + tmp.length(0); + for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET)) + { + if (chr == my_b_EOF) + return chr; + tmp.append(chr); + } + if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0) + val->append(chr); + else + { + val->append('&'); + val->append(tmp); + val->append(';'); + } + } + else + val->append(chr); + chr= GET; + } + return my_tospace(chr); +} + + +/* + Read a record in xml format + tags and attributes are stored in taglist + when tag set in ROWS IDENTIFIED BY is closed, we are ready and return +*/ +int READ_INFO::read_xml() +{ + DBUG_ENTER("READ_INFO::read_xml"); + int chr, chr2, chr3; + int delim= 0; + String tag, attribute, value; + bool in_tag= false; + + tag.length(0); + attribute.length(0); + value.length(0); + + for (chr= my_tospace(GET); chr != my_b_EOF ; ) + { + switch(chr){ + case '<': /* read tag */ + /* TODO: check if this is a comment <!-- comment --> */ + chr= my_tospace(GET); + if(chr == '!') + { + chr2= GET; + chr3= GET; + + if(chr2 == '-' && chr3 == '-') + { + chr2= 0; + chr3= 0; + chr= my_tospace(GET); + + while(chr != '>' || chr2 != '-' || chr3 != '-') + { + if(chr == '-') + { + chr3= chr2; + chr2= chr; + } + else if (chr2 == '-') + { + chr2= 0; + chr3= 0; + } + chr= my_tospace(GET); + if (chr == my_b_EOF) + goto found_eof; + } + break; + } + } + + tag.length(0); + while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF) + { + if(chr != delim) /* fix for the '<field name =' format */ + tag.append(chr); + chr= my_tospace(GET); + } + + // row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term + if((tag.length() == line_term_length -2) && + (memcmp(tag.ptr(), line_term_ptr + 1, tag.length()) == 0)) + { + DBUG_PRINT("read_xml", ("start-of-row: %i %s %s", + level,tag.c_ptr_safe(), line_term_ptr)); + } + + if(chr == ' ' || chr == '>') + { + level++; + clear_level(level + 1); + } + + if (chr == ' ') + in_tag= true; + else + in_tag= false; + break; + + case ' ': /* read attribute */ + while(chr == ' ') /* skip blanks */ + chr= my_tospace(GET); + + if(!in_tag) + break; + + while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF) + { + attribute.append(chr); + chr= my_tospace(GET); + } + break; + + case '>': /* end tag - read tag value */ + in_tag= false; + chr= read_value('<', &value); + if(chr == my_b_EOF) + goto found_eof; + + /* save value to list */ + if(tag.length() > 0 && value.length() > 0) + { + DBUG_PRINT("read_xml", ("lev:%i tag:%s val:%s", + level,tag.c_ptr_safe(), value.c_ptr_safe())); + taglist.push_front( new XML_TAG(level, tag, value)); + } + tag.length(0); + value.length(0); + attribute.length(0); + break; + + case '/': /* close tag */ + level--; + chr= my_tospace(GET); + if(chr != '>') /* if this is an empty tag <tag /> */ + tag.length(0); /* we should keep tag value */ + while(chr != '>' && chr != my_b_EOF) + { + tag.append(chr); + chr= my_tospace(GET); + } + + if((tag.length() == line_term_length -2) && + (memcmp(tag.ptr(), line_term_ptr + 1, tag.length()) == 0)) + { + DBUG_PRINT("read_xml", ("found end-of-row %i %s", + level, tag.c_ptr_safe())); + DBUG_RETURN(0); //normal return + } + chr= my_tospace(GET); + break; + + case '=': /* attribute name end - read the value */ + //check for tag field and attribute name + if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) && + !memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name"))) + { + /* + this is format <field name="xx">xx</field> + where actual fieldname is in attribute + */ + delim= my_tospace(GET); + tag.length(0); + attribute.length(0); + chr= '<'; /* we pretend that it is a tag */ + level--; + break; + } + + //check for " or ' + chr= GET; + if (chr == my_b_EOF) + goto found_eof; + if(chr == '"' || chr == '\'') + { + delim= chr; + } + else + { + delim= ' '; /* no delimiter, use space */ + PUSH(chr); + } + + chr= read_value(delim, &value); + if(attribute.length() > 0 && value.length() > 0) + { + DBUG_PRINT("read_xml", ("lev:%i att:%s val:%s\n", + level + 1, + attribute.c_ptr_safe(), + value.c_ptr_safe())); + taglist.push_front(new XML_TAG(level + 1, attribute, value)); + } + attribute.length(0); + value.length(0); + if (chr != ' ') + chr= my_tospace(GET); + break; + + default: + chr= my_tospace(GET); + } /* end switch */ + } /* end while */ + +found_eof: + DBUG_PRINT("read_xml",("Found eof")); + eof= 1; + DBUG_RETURN(1); +} |