diff options
author | Alexander Barkov <bar@mysql.com> | 2009-10-12 11:22:53 +0500 |
---|---|---|
committer | Alexander Barkov <bar@mysql.com> | 2009-10-12 11:22:53 +0500 |
commit | 93c885bd36569b5b2bd7c85e497ecf9830e848de (patch) | |
tree | 03278b18508c3d68f40753e2ad54f2f6508d63e2 | |
parent | 829525756e7a769b0c0de716f6207f5f21daa994 (diff) | |
download | mariadb-git-93c885bd36569b5b2bd7c85e497ecf9830e848de.tar.gz |
WL#1397 convert XML -> SQL
-rw-r--r-- | mysql-test/r/loadxml.result | 79 | ||||
-rw-r--r-- | mysql-test/std_data/loadxml.dat | 64 | ||||
-rw-r--r-- | mysql-test/std_data/loadxml2.dat | 19 | ||||
-rw-r--r-- | mysql-test/t/loadxml.test | 106 | ||||
-rw-r--r-- | sql/lex.h | 1 | ||||
-rw-r--r-- | sql/sql_class.cc | 8 | ||||
-rw-r--r-- | sql/sql_class.h | 5 | ||||
-rw-r--r-- | sql/sql_load.cc | 527 | ||||
-rw-r--r-- | sql/sql_yacc.yy | 84 |
9 files changed, 859 insertions, 34 deletions
diff --git a/mysql-test/r/loadxml.result b/mysql-test/r/loadxml.result new file mode 100644 index 00000000000..dc2392e5593 --- /dev/null +++ b/mysql-test/r/loadxml.result @@ -0,0 +1,79 @@ +drop table if exists t1, t2; +create table t1 (a int, b varchar(64)); +-- Load a static XML file +load xml infile '../../std_data/loadxml.dat' into table t1 +rows identified by '<row>'; +select * from t1 order by a; +a b +1 b1 +2 b2 +3 b3 +11 b11 +111 b111 +112 b112 & < > " ' &unknown; -- check entities +212 b212 +213 b213 +214 b214 +215 b215 +216 &bb b; +delete from t1; +-- Load a static XML file with 'IGNORE num ROWS' +load xml infile '../../std_data/loadxml.dat' into table t1 +rows identified by '<row>' ignore 4 rows; +select * from t1 order by a; +a b +111 b111 +112 b112 & < > " ' &unknown; -- check entities +212 b212 +213 b213 +214 b214 +215 b215 +216 &bb b; +-- Check 'mysqldump --xml' + 'LOAD XML' round trip +delete from t1; +load xml infile 'MYSQLTEST_VARDIR/tmp/loadxml-dump.xml' into table t1 rows identified by '<row>';; +select * from t1 order by a; +a b +111 b111 +112 b112 & < > " ' &unknown; -- check entities +212 b212 +213 b213 +214 b214 +215 b215 +216 &bb b; +--Check that default row tag is '<row> +delete from t1; +load xml infile 'MYSQLTEST_VARDIR/tmp/loadxml-dump.xml' into table t1;; +select * from t1 order by a; +a b +111 b111 +112 b112 & < > " ' &unknown; -- check entities +212 b212 +213 b213 +214 b214 +215 b215 +216 &bb b; +-- Check that 'xml' is not a keyword +select 1 as xml; +xml +1 +create table t2(fl text); +LOAD XML LOCAL INFILE "$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml" INTO TABLE t2 ROWS IDENTIFIED BY '<person>';; +show processlist; +Id User Host db Command Time State Info +2 root localhost test Query 0 NULL show processlist +5 root localhost test Query 3 Reading from net LOAD XML LOCAL INFILE "$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml" INTO TABLE t2 ROWS IDENTIFIED BY '<p +drop table t1; +drop table t2; +create table t1 ( +id int(11) not null, +text text, +primary key (id) +) engine=MyISAM default charset=latin1; +load xml infile '../../std_data/loadxml2.dat' into table t1; +select * from t1; +id text +1 line1
+line2
+line3 +drop table t1; diff --git a/mysql-test/std_data/loadxml.dat b/mysql-test/std_data/loadxml.dat new file mode 100644 index 00000000000..b72fac9da74 --- /dev/null +++ b/mysql-test/std_data/loadxml.dat @@ -0,0 +1,64 @@ +<?xml version="1.0"?> +<mysqldump xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> +<database name="test"> + <table_structure name="t1"> + <field Field="a" Type="int(11)" Null="YES" Key="" Extra="" /> + <field Field="b" Type="varchar(128)" Null="YES" Key="" Extra="" /> + <options Name="t1" Engine="MyISAM" Version="10" Row_format="Dynamic" Rows="3" Avg_row_length="20" Data_length="60" Max_data_length="281474976710655" Index_length="1024" Data_free="0" Create_time="2007-02-09 09:08:36" Update_time="2007-02-09 09:08:54" Collation="latin1_swedish_ci" Create_options="" Comment="" /> + </table_structure> + <table_data name="t1"> + <row> + <field name="a">1</field> + <field name="b">b1</field> + </row> + <row> + <field name="a">2</field> + <field name="b">b2</field> + </row> + <row> + <field name="a">3</field> + <field name="b">b3</field> + </row> + <row> + <field name="a">11</field> + <field name="b">b11</field> + </row> + + <!-- Check field values as tags --> + <row> + <a>111</a> + <b>b111</b> + </row> + + <row> + <a>112</a> + <b>b112 & < > " ' &unknown; -- check entities</b> + </row> + + + <!-- Check field values in attributes --> + <row a=212 b="b212"></row> + + <!-- Bug#29752 Linefeeds break LOAD XML INFILE --> + <!-- Check varios combinations of TAB and NL --> + + <row + a=213 b="b213"> + </row> + + <row + a=214 + b="b214"> + </row> + + <row a=215 b="b215"></row> + + <row a=216 b="&bb +b;"></row> + + <!-- End of bug#29752 --> + + </table_data> +</database> +</mysqldump> + diff --git a/mysql-test/std_data/loadxml2.dat b/mysql-test/std_data/loadxml2.dat new file mode 100644 index 00000000000..f0d966cf9b6 --- /dev/null +++ b/mysql-test/std_data/loadxml2.dat @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<mysqldump xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> +<database name="test_of_xml_import"> + <table_structure name="t1"> + <field Field="id" Type="int(11)" Null="NO" Key="PRI" Extra="" /> + <field Field="text" Type="text" Null="YES" Key="" Extra="" /> + <key Table="t1" Non_unique="0" Key_name="PRIMARY" Seq_in_index="1" Column_name="id" Collation="A" Cardinality="1" Null="" Index_type="BTREE" Comment="" Index_Comment="" /> + <options Name="t1" Engine="MyISAM" Version="10" Row_format="Dynamic" Rows="1" Avg_row_length="32" Data_length="32" Max_data_length="281474976710655" Index_length="2048" Data_free="0" Create_time="2009-06-18 10:02:37" Update_time="2009-06-18 10:02:43" Collation="latin1_swedish_ci" Create_options="" Comment="" /> + </table_structure> + <table_data name="t1"> + <row> + <field name="id">1</field> + <field name="text">line1
+line2
+line3</field> + </row> + </table_data> +</database> +</mysqldump> diff --git a/mysql-test/t/loadxml.test b/mysql-test/t/loadxml.test new file mode 100644 index 00000000000..bb088559ae2 --- /dev/null +++ b/mysql-test/t/loadxml.test @@ -0,0 +1,106 @@ +# +# Tests for "LOAD XML" - a contributed patch from Erik Wetterberg. +# + +# Running the $MYSQL_DUMP tool against an embedded server does not work. +--source include/not_embedded.inc + +--disable_warnings +drop table if exists t1, t2; +--enable_warnings + +create table t1 (a int, b varchar(64)); + + +--echo -- Load a static XML file +load xml infile '../../std_data/loadxml.dat' into table t1 +rows identified by '<row>'; +select * from t1 order by a; +delete from t1; + + +--echo -- Load a static XML file with 'IGNORE num ROWS' +load xml infile '../../std_data/loadxml.dat' into table t1 +rows identified by '<row>' ignore 4 rows; +select * from t1 order by a; + + +--echo -- Check 'mysqldump --xml' + 'LOAD XML' round trip +--exec $MYSQL_DUMP --xml test t1 > "$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml" 2>&1 +delete from t1; +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--eval load xml infile '$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml' into table t1 rows identified by '<row>'; +select * from t1 order by a; + +--echo --Check that default row tag is '<row> +delete from t1; +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +--eval load xml infile '$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml' into table t1; +select * from t1 order by a; + +--echo -- Check that 'xml' is not a keyword +select 1 as xml; + + +# +# Bug #42520 killing load .. infile Assertion failed: ! is_set(), file .\sql_error.cc, line 8 +# + +--disable_query_log +delete from t1; +insert into t1 values (1, '12345678900987654321'), (2, 'asdfghjkl;asdfghjkl;'); +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +insert into t1 select * from t1; +--exec $MYSQL_DUMP --xml test t1 > "$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml" 2>&1 +--enable_query_log + +connect (addconroot, localhost, root,,); +connection addconroot; +create table t2(fl text); +--let $PSEUDO_THREAD_ID=`select @@pseudo_thread_id ` + +--send LOAD XML LOCAL INFILE "$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml" INTO TABLE t2 ROWS IDENTIFIED BY '<person>'; + +sleep 3; + + +connection default; +show processlist; + +--disable_query_log +--eval kill $PSEUDO_THREAD_ID +--enable_query_log + +disconnect addconroot; + +# +# Clean up +# +remove_file $MYSQLTEST_VARDIR/tmp/loadxml-dump.xml; +drop table t1; +drop table t2; + +# +# Bug #36750 LOAD XML doesn't understand new line (feed) characters in multi line text fields +# + +create table t1 ( + id int(11) not null, + text text, + primary key (id) +) engine=MyISAM default charset=latin1; +load xml infile '../../std_data/loadxml2.dat' into table t1; +select * from t1; +drop table t1; + diff --git a/sql/lex.h b/sql/lex.h index 268c77a4f98..b4945bbfd1a 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -607,6 +607,7 @@ static SYMBOL symbols[] = { { "X509", SYM(X509_SYM)}, { "XOR", SYM(XOR)}, { "XA", SYM(XA_SYM)}, + { "XML", SYM(XML_SYM)}, /* LOAD XML Arnold/Erik */ { "YEAR", SYM(YEAR_SYM)}, { "YEAR_MONTH", SYM(YEAR_MONTH_SYM)}, { "ZEROFILL", SYM(ZEROFILL)}, diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 01a1fff3048..11692c2fd9f 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -1602,13 +1602,17 @@ bool select_result::check_simple_select() const static String default_line_term("\n",default_charset_info); static String default_escaped("\\",default_charset_info); static String default_field_term("\t",default_charset_info); +static String default_xml_row_term("<row>", default_charset_info); -sql_exchange::sql_exchange(char *name,bool flag) +sql_exchange::sql_exchange(char *name, bool flag, + enum enum_filetype filetype_arg) :file_name(name), opt_enclosed(0), dumpfile(flag), skip_lines(0) { + filetype= filetype_arg; field_term= &default_field_term; enclosed= line_start= &my_empty_string; - line_term= &default_line_term; + line_term= filetype == FILETYPE_CSV ? + &default_line_term : &default_xml_row_term; escaped= &default_escaped; cs= NULL; } diff --git a/sql/sql_class.h b/sql/sql_class.h index 4874801e380..5dcf3fdaad0 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -89,6 +89,7 @@ enum enum_slave_exec_mode { SLAVE_EXEC_MODE_STRICT, SLAVE_EXEC_MODE_LAST_BIT}; enum enum_mark_columns { MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE}; +enum enum_filetype { FILETYPE_CSV, FILETYPE_XML }; extern char internal_table_name[2]; extern char empty_c_string[1]; @@ -2364,13 +2365,15 @@ my_eof(THD *thd) class sql_exchange :public Sql_alloc { public: + enum enum_filetype filetype; /* load XML, Added by Arnold & Erik */ char *file_name; String *field_term,*enclosed,*line_term,*line_start,*escaped; bool opt_enclosed; bool dumpfile; ulong skip_lines; CHARSET_INFO *cs; - sql_exchange(char *name,bool dumpfile_flag); + sql_exchange(char *name, bool dumpfile_flag, + enum_filetype filetype_arg= FILETYPE_CSV); bool escaped_given(void); }; diff --git a/sql/sql_load.cc b/sql/sql_load.cc index 079b6f2fe43..847417a31ed 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -15,6 +15,8 @@ /* Copy data from a textfile to table */ +/* 2006-12 Erik Wetterberg : LOAD XML added */ + #include "mysql_priv.h" #include <my_dir.h> #include <m_ctype.h> @@ -23,6 +25,23 @@ #include "sp_head.h" #include "sql_trigger.h" +class XML_TAG { +public: + int level; + String field; + String value; + XML_TAG(int l, String f, String v); +}; + + +XML_TAG::XML_TAG(int l, String f, String v) +{ + level= l; + field.append(f); + value.append(v); +} + + class READ_INFO { File file; uchar *buffer, /* Buffer for read text */ @@ -37,6 +56,7 @@ class READ_INFO { bool need_end_io_cache; IO_CACHE cache; NET *io_net; + int level; /* for load xml */ public: bool error,line_cuted,found_null,enclosed; @@ -54,6 +74,12 @@ public: char unescape(char chr); int terminator(char *ptr,uint length); bool find_start_of_fields(); + /* load xml */ + List<XML_TAG> taglist; + int read_value(int delim, String *val); + int read_xml(); + int clear_level(int level); + /* We need to force cache close before destructor is invoked to log the last read block @@ -82,6 +108,13 @@ static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, List<Item> &set_values, READ_INFO &read_info, String &enclosed, ulong skip_lines, bool ignore_check_option_errors); + +static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, + List<Item> &fields_vars, List<Item> &set_fields, + List<Item> &set_values, READ_INFO &read_info, + String &enclosed, ulong skip_lines, + bool ignore_check_option_errors); + #ifndef EMBEDDED_LIBRARY static bool write_execute_load_query_log_event(THD *thd, bool duplicates, bool ignore, @@ -390,7 +423,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, thd->count_cuted_fields= CHECK_FIELD_WARN; /* calc cuted fields */ thd->cuted_fields=0L; /* Skip lines if there is a line terminator */ - if (ex->line_term->length()) + if (ex->line_term->length() && ex->filetype != FILETYPE_XML) { /* ex->skip_lines needs to be preserved for logging */ while (skip_lines > 0) @@ -421,7 +454,11 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, (MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES))); - if (!field_term->length() && !enclosed->length()) + if (ex->filetype == FILETYPE_XML) /* load xml */ + error= read_xml_field(thd, info, table_list, fields_vars, + set_fields, set_values, read_info, + *(ex->line_term), skip_lines, ignore); + else if (!field_term->length() && !enclosed->length()) error= read_fixed_length(thd, info, table_list, fields_vars, set_fields, set_values, read_info, skip_lines, ignore); @@ -917,6 +954,171 @@ continue_loop:; } +/**************************************************************************** +** Read rows in xml format +****************************************************************************/ +static int +read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list, + List<Item> &fields_vars, List<Item> &set_fields, + List<Item> &set_values, READ_INFO &read_info, + String &row_tag, ulong skip_lines, + bool ignore_check_option_errors) +{ + List_iterator_fast<Item> it(fields_vars); + Item *item; + TABLE *table= table_list->table; + bool no_trans_update_stmt; + CHARSET_INFO *cs= read_info.read_charset; + DBUG_ENTER("read_xml_field"); + + no_trans_update_stmt= !table->file->has_transactions(); + + for ( ; ; it.rewind()) + { + if (thd->killed) + { + thd->send_kill_message(); + DBUG_RETURN(1); + } + + // read row tag and save values into tag list + if (read_info.read_xml()) + break; + + List_iterator_fast<XML_TAG> xmlit(read_info.taglist); + xmlit.rewind(); + XML_TAG *tag= NULL; + +#ifndef DBUG_OFF + DBUG_PRINT("read_xml_field", ("skip_lines=%d", (int) skip_lines)); + while ((tag= xmlit++)) + { + DBUG_PRINT("read_xml_field", ("got tag:%i '%s' '%s'", + tag->level, tag->field.c_ptr(), + tag->value.c_ptr())); + } +#endif + + restore_record(table, s->default_values); + + while ((item= it++)) + { + /* If this line is to be skipped we don't want to fill field or var */ + if (skip_lines) + continue; + + /* find field in tag list */ + xmlit.rewind(); + tag= xmlit++; + + while(tag && strcmp(tag->field.c_ptr(), item->name) != 0) + tag= xmlit++; + + if (!tag) // found null + { + if (item->type() == Item::FIELD_ITEM) + { + Field *field= ((Item_field *) item)->field; + field->reset(); + field->set_null(); + if (field == table->next_number_field) + table->auto_increment_field_not_null= TRUE; + if (!field->maybe_null()) + { + if (field->type() == FIELD_TYPE_TIMESTAMP) + ((Field_timestamp *) field)->set_time(); + else if (field != table->next_number_field) + field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, + ER_WARN_NULL_TO_NOTNULL, 1); + } + } + else + ((Item_user_var_as_out_param *) item)->set_null_value(cs); + continue; + } + + if (item->type() == Item::FIELD_ITEM) + { + + Field *field= ((Item_field *)item)->field; + field->set_notnull(); + if (field == table->next_number_field) + table->auto_increment_field_not_null= TRUE; + field->store((char *) tag->value.ptr(), tag->value.length(), cs); + } + else + ((Item_user_var_as_out_param *) item)->set_value( + (char *) tag->value.ptr(), + tag->value.length(), cs); + } + + if (read_info.error) + break; + + if (skip_lines) + { + skip_lines--; + continue; + } + + if (item) + { + /* Have not read any field, thus input file is simply ended */ + if (item == fields_vars.head()) + break; + + for ( ; item; item= it++) + { + if (item->type() == Item::FIELD_ITEM) + { + /* + QQ: We probably should not throw warning for each field. + But how about intention to always have the same number + of warnings in THD::cuted_fields (and get rid of cuted_fields + in the end ?) + */ + thd->cuted_fields++; + push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, + ER_WARN_TOO_FEW_RECORDS, + ER(ER_WARN_TOO_FEW_RECORDS), + thd->warning_info->current_row_for_warning()); + } + else + ((Item_user_var_as_out_param *)item)->set_null_value(cs); + } + } + + if (thd->killed || + fill_record_n_invoke_before_triggers(thd, set_fields, set_values, + ignore_check_option_errors, + table->triggers, + TRG_EVENT_INSERT)) + DBUG_RETURN(1); + + switch (table_list->view_check_option(thd, + ignore_check_option_errors)) { + case VIEW_CHECK_SKIP: + read_info.next_line(); + goto continue_loop; + case VIEW_CHECK_ERROR: + DBUG_RETURN(-1); + } + + if (write_record(thd, table, &info)) + DBUG_RETURN(1); + + /* + We don't need to reset auto-increment field since we are restoring + its default value at the beginning of each loop iteration. + */ + thd->transaction.stmt.modified_non_trans_table= no_trans_update_stmt; + thd->warning_info->inc_current_row_for_warning(); + continue_loop:; + } + DBUG_RETURN(test(read_info.error) || thd->is_error()); +} /* load xml end */ + + /* Unescape all escape characters, mark \N as null */ char @@ -955,6 +1157,7 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs, field_term_length= field_term.length(); line_term_ptr=(char*) line_term.ptr(); line_term_length= line_term.length(); + level= 0; /* for load xml */ if (line_start.length() == 0) { line_start_ptr=0; @@ -1030,6 +1233,10 @@ READ_INFO::~READ_INFO() my_free((uchar*) buffer,MYF(0)); error=1; } + List_iterator<XML_TAG> xmlit(taglist); + XML_TAG *t; + while ((t= xmlit++)) + delete(t); } @@ -1362,3 +1569,319 @@ bool READ_INFO::find_start_of_fields() } return 0; } + + +/* + Clear taglist from tags with a specified level +*/ +int READ_INFO::clear_level(int level) +{ + DBUG_ENTER("READ_INFO::read_xml clear_level"); + List_iterator<XML_TAG> xmlit(taglist); + xmlit.rewind(); + XML_TAG *tag; + + while ((tag= xmlit++)) + { + if(tag->level >= level) + { + xmlit.remove(); + delete tag; + } + } + DBUG_RETURN(0); +} + + +/* + Convert an XML entity to Unicode value. + Return -1 on error; +*/ +static int +my_xml_entity_to_char(const char *name, uint length) +{ + if (length == 2) + { + if (!memcmp(name, "gt", length)) + return '>'; + if (!memcmp(name, "lt", length)) + return '<'; + } + else if (length == 3) + { + if (!memcmp(name, "amp", length)) + return '&'; + } + else if (length == 4) + { + if (!memcmp(name, "quot", length)) + return '"'; + if (!memcmp(name, "apos", length)) + return '\''; + } + return -1; +} + + +/** + @brief Convert newline, linefeed, tab to space + + @param chr character + + @details According to the "XML 1.0" standard, + only space (#x20) characters, carriage returns, + line feeds or tabs are considered as spaces. + Convert all of them to space (#x20) for parsing simplicity. +*/ +static int +my_tospace(int chr) +{ + return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr; +} + + +/* + Read an xml value: handle multibyte and xml escape +*/ +int READ_INFO::read_value(int delim, String *val) +{ + int chr; + String tmp; + + for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF;) + { +#ifdef USE_MB + if (my_mbcharlen(read_charset, chr) > 1) + { + DBUG_PRINT("read_xml",("multi byte")); + int i, ml= my_mbcharlen(read_charset, chr); + for (i= 1; i < ml; i++) + { + val->append(chr); + /* + Don't use my_tospace() in the middle of a multi-byte character + TODO: check that the multi-byte sequence is valid. + */ + chr= GET; + if (chr == my_b_EOF) + return chr; + } + } +#endif + if(chr == '&') + { + tmp.length(0); + for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET)) + { + if (chr == my_b_EOF) + return chr; + tmp.append(chr); + } + if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0) + val->append(chr); + else + { + val->append('&'); + val->append(tmp); + val->append(';'); + } + } + else + val->append(chr); + chr= GET; + } + return my_tospace(chr); +} + + +/* + Read a record in xml format + tags and attributes are stored in taglist + when tag set in ROWS IDENTIFIED BY is closed, we are ready and return +*/ +int READ_INFO::read_xml() +{ + DBUG_ENTER("READ_INFO::read_xml"); + int chr, chr2, chr3; + int delim= 0; + String tag, attribute, value; + bool in_tag= false; + + tag.length(0); + attribute.length(0); + value.length(0); + + for (chr= my_tospace(GET); chr != my_b_EOF ; ) + { + switch(chr){ + case '<': /* read tag */ + /* TODO: check if this is a comment <!-- comment --> */ + chr= my_tospace(GET); + if(chr == '!') + { + chr2= GET; + chr3= GET; + + if(chr2 == '-' && chr3 == '-') + { + chr2= 0; + chr3= 0; + chr= my_tospace(GET); + + while(chr != '>' || chr2 != '-' || chr3 != '-') + { + if(chr == '-') + { + chr3= chr2; + chr2= chr; + } + else if (chr2 == '-') + { + chr2= 0; + chr3= 0; + } + chr= my_tospace(GET); + if (chr == my_b_EOF) + goto found_eof; + } + break; + } + } + + tag.length(0); + while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF) + { + if(chr != delim) /* fix for the '<field name =' format */ + tag.append(chr); + chr= my_tospace(GET); + } + + // row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term + if((tag.length() == line_term_length -2) && + (strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0)) + { + DBUG_PRINT("read_xml", ("start-of-row: %i %s %s", + level,tag.c_ptr_safe(), line_term_ptr)); + } + + if(chr == ' ' || chr == '>') + { + level++; + clear_level(level + 1); + } + + if (chr == ' ') + in_tag= true; + else + in_tag= false; + break; + + case ' ': /* read attribute */ + while(chr == ' ') /* skip blanks */ + chr= my_tospace(GET); + + if(!in_tag) + break; + + while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF) + { + attribute.append(chr); + chr= my_tospace(GET); + } + break; + + case '>': /* end tag - read tag value */ + in_tag= false; + chr= read_value('<', &value); + if(chr == my_b_EOF) + goto found_eof; + + /* save value to list */ + if(tag.length() > 0 && value.length() > 0) + { + DBUG_PRINT("read_xml", ("lev:%i tag:%s val:%s", + level,tag.c_ptr_safe(), value.c_ptr_safe())); + taglist.push_front( new XML_TAG(level, tag, value)); + } + tag.length(0); + value.length(0); + attribute.length(0); + break; + + case '/': /* close tag */ + level--; + chr= my_tospace(GET); + if(chr != '>') /* if this is an empty tag <tag /> */ + tag.length(0); /* we should keep tag value */ + while(chr != '>' && chr != my_b_EOF) + { + tag.append(chr); + chr= my_tospace(GET); + } + + if((tag.length() == line_term_length -2) && + (strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0)) + { + DBUG_PRINT("read_xml", ("found end-of-row %i %s", + level, tag.c_ptr_safe())); + DBUG_RETURN(0); //normal return + } + chr= my_tospace(GET); + break; + + case '=': /* attribute name end - read the value */ + //check for tag field and attribute name + if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) && + !memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name"))) + { + /* + this is format <field name="xx">xx</field> + where actual fieldname is in attribute + */ + delim= my_tospace(GET); + tag.length(0); + attribute.length(0); + chr= '<'; /* we pretend that it is a tag */ + level--; + break; + } + + //check for " or ' + chr= GET; + if (chr == my_b_EOF) + goto found_eof; + if(chr == '"' || chr == '\'') + { + delim= chr; + } + else + { + delim= ' '; /* no delimiter, use space */ + PUSH(chr); + } + + chr= read_value(delim, &value); + if(attribute.length() > 0 && value.length() > 0) + { + DBUG_PRINT("read_xml", ("lev:%i att:%s val:%s\n", + level + 1, + attribute.c_ptr_safe(), + value.c_ptr_safe())); + taglist.push_front(new XML_TAG(level + 1, attribute, value)); + } + attribute.length(0); + value.length(0); + if (chr != ' ') + chr= my_tospace(GET); + break; + + default: + chr= my_tospace(GET); + } /* end switch */ + } /* end while */ + +found_eof: + DBUG_PRINT("read_xml",("Found eof")); + eof= 1; + DBUG_RETURN(1); +} diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 5a594b4a06a..3b828b69b0c 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -508,6 +508,7 @@ Item* handle_sql2003_note184_exception(THD *thd, Item* left, bool equal, sp_head *sphead; struct p_elem_val *p_elem_value; enum index_hint_type index_hint; + enum enum_filetype filetype; Diag_condition_item_name diag_condition_item_name; } @@ -1116,6 +1117,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token WRITE_SYM /* SQL-2003-N */ %token X509_SYM %token XA_SYM +%token XML_SYM %token XOR %token YEAR_MONTH_SYM %token YEAR_SYM /* SQL-2003-R */ @@ -1309,7 +1311,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); prepare prepare_src execute deallocate statement sp_suid sp_c_chistics sp_a_chistics sp_chistic sp_c_chistic xa - load_data opt_field_or_var_spec fields_or_vars opt_load_data_set_spec + opt_field_or_var_spec fields_or_vars opt_load_data_set_spec view_replace_or_algorithm view_replace view_algorithm view_or_trigger_or_sp_or_event definer_tail no_definer_tail @@ -1338,6 +1340,7 @@ END_OF_INPUT %type <spname> sp_name %type <index_hint> index_hint_type %type <num> index_hint_clause +%type <filetype> data_or_xml %type <NONE> signal_stmt resignal_stmt %type <diag_condition_item_name> signal_condition_information_item_name @@ -10663,7 +10666,7 @@ use: /* import, export of files */ load: - LOAD DATA_SYM + LOAD data_or_xml { THD *thd= YYTHD; LEX *lex= thd->lex; @@ -10671,39 +10674,21 @@ load: if (lex->sphead) { - my_error(ER_SP_BADSTATEMENT, MYF(0), "LOAD DATA"); + my_error(ER_SP_BADSTATEMENT, MYF(0), + $2 == FILETYPE_CSV ? "LOAD DATA" : "LOAD XML"); MYSQL_YYABORT; } lex->fname_start= lip->get_ptr(); } - load_data - {} - | LOAD TABLE_SYM table_ident FROM MASTER_SYM - { - LEX *lex=Lex; - WARN_DEPRECATED(yythd, "6.0", "LOAD TABLE FROM MASTER", - "MySQL Administrator (mysqldump, mysql)"); - if (lex->sphead) - { - my_error(ER_SP_BADSTATEMENT, MYF(0), "LOAD TABLE"); - MYSQL_YYABORT; - } - lex->sql_command = SQLCOM_LOAD_MASTER_TABLE; - if (!Select->add_table_to_list(YYTHD, $3, NULL, TL_OPTION_UPDATING)) - MYSQL_YYABORT; - } - ; - -load_data: load_data_lock opt_local INFILE TEXT_STRING_filesystem { LEX *lex=Lex; lex->sql_command= SQLCOM_LOAD; - lex->lock_option= $1; - lex->local_file= $2; + lex->lock_option= $4; + lex->local_file= $5; lex->duplicates= DUP_ERROR; lex->ignore= 0; - if (!(lex->exchange= new sql_exchange($4.str, 0))) + if (!(lex->exchange= new sql_exchange($7.str, 0, $2))) MYSQL_YYABORT; } opt_duplicate INTO @@ -10713,7 +10698,7 @@ load_data: TABLE_SYM table_ident { LEX *lex=Lex; - if (!Select->add_table_to_list(YYTHD, $10, NULL, TL_OPTION_UPDATING, + if (!Select->add_table_to_list(YYTHD, $13, NULL, TL_OPTION_UPDATING, lex->lock_option)) MYSQL_YYABORT; lex->field_list.empty(); @@ -10721,12 +10706,37 @@ load_data: lex->value_list.empty(); } opt_load_data_charset - { Lex->exchange->cs= $12; } + { Lex->exchange->cs= $15; } + opt_xml_rows_identified_by opt_field_term opt_line_term opt_ignore_lines opt_field_or_var_spec opt_load_data_set_spec {} - | FROM MASTER_SYM + | LOAD TABLE_SYM table_ident FROM MASTER_SYM { + LEX *lex=Lex; + WARN_DEPRECATED(yythd, "6.0", "LOAD TABLE FROM MASTER", + "MySQL Administrator (mysqldump, mysql)"); + if (lex->sphead) + { + my_error(ER_SP_BADSTATEMENT, MYF(0), "LOAD TABLE"); + MYSQL_YYABORT; + } + lex->sql_command = SQLCOM_LOAD_MASTER_TABLE; + if (!Select->add_table_to_list(YYTHD, $3, NULL, TL_OPTION_UPDATING)) + MYSQL_YYABORT; + } + | LOAD DATA_SYM FROM MASTER_SYM + { + THD *thd= YYTHD; + LEX *lex= thd->lex; + Lex_input_stream *lip= YYLIP; + + if (lex->sphead) + { + my_error(ER_SP_BADSTATEMENT, MYF(0), "LOAD DATA"); + MYSQL_YYABORT; + } + lex->fname_start= lip->get_ptr(); Lex->sql_command = SQLCOM_LOAD_MASTER_DATA; WARN_DEPRECATED(yythd, "6.0", "LOAD DATA FROM MASTER", "mysqldump or future " @@ -10734,6 +10744,11 @@ load_data: } ; +data_or_xml: + DATA_SYM { $$= FILETYPE_CSV; } + | XML_SYM { $$= FILETYPE_XML; } + ; + opt_local: /* empty */ { $$=0;} | LOCAL_SYM { $$=1;} @@ -10820,15 +10835,25 @@ line_term: } ; +opt_xml_rows_identified_by: + /* empty */ { } + | ROWS_SYM IDENTIFIED_SYM BY text_string + { Lex->exchange->line_term = $4; }; + opt_ignore_lines: /* empty */ - | IGNORE_SYM NUM LINES + | IGNORE_SYM NUM lines_or_rows { DBUG_ASSERT(Lex->exchange != 0); Lex->exchange->skip_lines= atol($2.str); } ; +lines_or_rows: + LINES { } + | ROWS_SYM { } + ; + opt_field_or_var_spec: /* empty */ {} | '(' fields_or_vars ')' {} @@ -11924,6 +11949,7 @@ keyword_sp: | WEEK_SYM {} | WORK_SYM {} | X509_SYM {} + | XML_SYM {} | YEAR_SYM {} ; |