summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mysql.com>2009-10-12 11:22:53 +0500
committerAlexander Barkov <bar@mysql.com>2009-10-12 11:22:53 +0500
commit93c885bd36569b5b2bd7c85e497ecf9830e848de (patch)
tree03278b18508c3d68f40753e2ad54f2f6508d63e2
parent829525756e7a769b0c0de716f6207f5f21daa994 (diff)
downloadmariadb-git-93c885bd36569b5b2bd7c85e497ecf9830e848de.tar.gz
WL#1397 convert XML -> SQL
-rw-r--r--mysql-test/r/loadxml.result79
-rw-r--r--mysql-test/std_data/loadxml.dat64
-rw-r--r--mysql-test/std_data/loadxml2.dat19
-rw-r--r--mysql-test/t/loadxml.test106
-rw-r--r--sql/lex.h1
-rw-r--r--sql/sql_class.cc8
-rw-r--r--sql/sql_class.h5
-rw-r--r--sql/sql_load.cc527
-rw-r--r--sql/sql_yacc.yy84
9 files changed, 859 insertions, 34 deletions
diff --git a/mysql-test/r/loadxml.result b/mysql-test/r/loadxml.result
new file mode 100644
index 00000000000..dc2392e5593
--- /dev/null
+++ b/mysql-test/r/loadxml.result
@@ -0,0 +1,79 @@
+drop table if exists t1, t2;
+create table t1 (a int, b varchar(64));
+-- Load a static XML file
+load xml infile '../../std_data/loadxml.dat' into table t1
+rows identified by '<row>';
+select * from t1 order by a;
+a b
+1 b1
+2 b2
+3 b3
+11 b11
+111 b111
+112 b112 & < > " ' &unknown; -- check entities
+212 b212
+213 b213
+214 b214
+215 b215
+216 &bb b;
+delete from t1;
+-- Load a static XML file with 'IGNORE num ROWS'
+load xml infile '../../std_data/loadxml.dat' into table t1
+rows identified by '<row>' ignore 4 rows;
+select * from t1 order by a;
+a b
+111 b111
+112 b112 & < > " ' &unknown; -- check entities
+212 b212
+213 b213
+214 b214
+215 b215
+216 &bb b;
+-- Check 'mysqldump --xml' + 'LOAD XML' round trip
+delete from t1;
+load xml infile 'MYSQLTEST_VARDIR/tmp/loadxml-dump.xml' into table t1 rows identified by '<row>';;
+select * from t1 order by a;
+a b
+111 b111
+112 b112 & < > " ' &unknown; -- check entities
+212 b212
+213 b213
+214 b214
+215 b215
+216 &bb b;
+--Check that default row tag is '<row>
+delete from t1;
+load xml infile 'MYSQLTEST_VARDIR/tmp/loadxml-dump.xml' into table t1;;
+select * from t1 order by a;
+a b
+111 b111
+112 b112 & < > " ' &unknown; -- check entities
+212 b212
+213 b213
+214 b214
+215 b215
+216 &bb b;
+-- Check that 'xml' is not a keyword
+select 1 as xml;
+xml
+1
+create table t2(fl text);
+LOAD XML LOCAL INFILE "$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml" INTO TABLE t2 ROWS IDENTIFIED BY '<person>';;
+show processlist;
+Id User Host db Command Time State Info
+2 root localhost test Query 0 NULL show processlist
+5 root localhost test Query 3 Reading from net LOAD XML LOCAL INFILE "$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml" INTO TABLE t2 ROWS IDENTIFIED BY '<p
+drop table t1;
+drop table t2;
+create table t1 (
+id int(11) not null,
+text text,
+primary key (id)
+) engine=MyISAM default charset=latin1;
+load xml infile '../../std_data/loadxml2.dat' into table t1;
+select * from t1;
+id text
+1 line1
+line2
+line3
+drop table t1;
diff --git a/mysql-test/std_data/loadxml.dat b/mysql-test/std_data/loadxml.dat
new file mode 100644
index 00000000000..b72fac9da74
--- /dev/null
+++ b/mysql-test/std_data/loadxml.dat
@@ -0,0 +1,64 @@
+<?xml version="1.0"?>
+<mysqldump xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<database name="test">
+ <table_structure name="t1">
+ <field Field="a" Type="int(11)" Null="YES" Key="" Extra="" />
+ <field Field="b" Type="varchar(128)" Null="YES" Key="" Extra="" />
+ <options Name="t1" Engine="MyISAM" Version="10" Row_format="Dynamic" Rows="3" Avg_row_length="20" Data_length="60" Max_data_length="281474976710655" Index_length="1024" Data_free="0" Create_time="2007-02-09 09:08:36" Update_time="2007-02-09 09:08:54" Collation="latin1_swedish_ci" Create_options="" Comment="" />
+ </table_structure>
+ <table_data name="t1">
+ <row>
+ <field name="a">1</field>
+ <field name="b">b1</field>
+ </row>
+ <row>
+ <field name="a">2</field>
+ <field name="b">b2</field>
+ </row>
+ <row>
+ <field name="a">3</field>
+ <field name="b">b3</field>
+ </row>
+ <row>
+ <field name="a">11</field>
+ <field name="b">b11</field>
+ </row>
+
+ <!-- Check field values as tags -->
+ <row>
+ <a>111</a>
+ <b>b111</b>
+ </row>
+
+ <row>
+ <a>112</a>
+ <b>b112 &amp; &lt; &gt; &quot; &apos; &unknown; -- check entities</b>
+ </row>
+
+
+ <!-- Check field values in attributes -->
+ <row a=212 b="b212"></row>
+
+ <!-- Bug#29752 Linefeeds break LOAD XML INFILE -->
+ <!-- Check varios combinations of TAB and NL -->
+
+ <row
+ a=213 b="b213">
+ </row>
+
+ <row
+ a=214
+ b="b214">
+ </row>
+
+ <row a=215 b="b215"></row>
+
+ <row a=216 b="&bb
+b;"></row>
+
+ <!-- End of bug#29752 -->
+
+ </table_data>
+</database>
+</mysqldump>
+
diff --git a/mysql-test/std_data/loadxml2.dat b/mysql-test/std_data/loadxml2.dat
new file mode 100644
index 00000000000..f0d966cf9b6
--- /dev/null
+++ b/mysql-test/std_data/loadxml2.dat
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<mysqldump xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<database name="test_of_xml_import">
+ <table_structure name="t1">
+ <field Field="id" Type="int(11)" Null="NO" Key="PRI" Extra="" />
+ <field Field="text" Type="text" Null="YES" Key="" Extra="" />
+ <key Table="t1" Non_unique="0" Key_name="PRIMARY" Seq_in_index="1" Column_name="id" Collation="A" Cardinality="1" Null="" Index_type="BTREE" Comment="" Index_Comment="" />
+ <options Name="t1" Engine="MyISAM" Version="10" Row_format="Dynamic" Rows="1" Avg_row_length="32" Data_length="32" Max_data_length="281474976710655" Index_length="2048" Data_free="0" Create_time="2009-06-18 10:02:37" Update_time="2009-06-18 10:02:43" Collation="latin1_swedish_ci" Create_options="" Comment="" />
+ </table_structure>
+ <table_data name="t1">
+ <row>
+ <field name="id">1</field>
+ <field name="text">line1
+line2
+line3</field>
+ </row>
+ </table_data>
+</database>
+</mysqldump>
diff --git a/mysql-test/t/loadxml.test b/mysql-test/t/loadxml.test
new file mode 100644
index 00000000000..bb088559ae2
--- /dev/null
+++ b/mysql-test/t/loadxml.test
@@ -0,0 +1,106 @@
+#
+# Tests for "LOAD XML" - a contributed patch from Erik Wetterberg.
+#
+
+# Running the $MYSQL_DUMP tool against an embedded server does not work.
+--source include/not_embedded.inc
+
+--disable_warnings
+drop table if exists t1, t2;
+--enable_warnings
+
+create table t1 (a int, b varchar(64));
+
+
+--echo -- Load a static XML file
+load xml infile '../../std_data/loadxml.dat' into table t1
+rows identified by '<row>';
+select * from t1 order by a;
+delete from t1;
+
+
+--echo -- Load a static XML file with 'IGNORE num ROWS'
+load xml infile '../../std_data/loadxml.dat' into table t1
+rows identified by '<row>' ignore 4 rows;
+select * from t1 order by a;
+
+
+--echo -- Check 'mysqldump --xml' + 'LOAD XML' round trip
+--exec $MYSQL_DUMP --xml test t1 > "$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml" 2>&1
+delete from t1;
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval load xml infile '$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml' into table t1 rows identified by '<row>';
+select * from t1 order by a;
+
+--echo --Check that default row tag is '<row>
+delete from t1;
+--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
+--eval load xml infile '$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml' into table t1;
+select * from t1 order by a;
+
+--echo -- Check that 'xml' is not a keyword
+select 1 as xml;
+
+
+#
+# Bug #42520 killing load .. infile Assertion failed: ! is_set(), file .\sql_error.cc, line 8
+#
+
+--disable_query_log
+delete from t1;
+insert into t1 values (1, '12345678900987654321'), (2, 'asdfghjkl;asdfghjkl;');
+insert into t1 select * from t1;
+insert into t1 select * from t1;
+insert into t1 select * from t1;
+insert into t1 select * from t1;
+insert into t1 select * from t1;
+insert into t1 select * from t1;
+insert into t1 select * from t1;
+insert into t1 select * from t1;
+insert into t1 select * from t1;
+insert into t1 select * from t1;
+insert into t1 select * from t1;
+insert into t1 select * from t1;
+insert into t1 select * from t1;
+--exec $MYSQL_DUMP --xml test t1 > "$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml" 2>&1
+--enable_query_log
+
+connect (addconroot, localhost, root,,);
+connection addconroot;
+create table t2(fl text);
+--let $PSEUDO_THREAD_ID=`select @@pseudo_thread_id `
+
+--send LOAD XML LOCAL INFILE "$MYSQLTEST_VARDIR/tmp/loadxml-dump.xml" INTO TABLE t2 ROWS IDENTIFIED BY '<person>';
+
+sleep 3;
+
+
+connection default;
+show processlist;
+
+--disable_query_log
+--eval kill $PSEUDO_THREAD_ID
+--enable_query_log
+
+disconnect addconroot;
+
+#
+# Clean up
+#
+remove_file $MYSQLTEST_VARDIR/tmp/loadxml-dump.xml;
+drop table t1;
+drop table t2;
+
+#
+# Bug #36750 LOAD XML doesn't understand new line (feed) characters in multi line text fields
+#
+
+create table t1 (
+ id int(11) not null,
+ text text,
+ primary key (id)
+) engine=MyISAM default charset=latin1;
+load xml infile '../../std_data/loadxml2.dat' into table t1;
+select * from t1;
+drop table t1;
+
diff --git a/sql/lex.h b/sql/lex.h
index 268c77a4f98..b4945bbfd1a 100644
--- a/sql/lex.h
+++ b/sql/lex.h
@@ -607,6 +607,7 @@ static SYMBOL symbols[] = {
{ "X509", SYM(X509_SYM)},
{ "XOR", SYM(XOR)},
{ "XA", SYM(XA_SYM)},
+ { "XML", SYM(XML_SYM)}, /* LOAD XML Arnold/Erik */
{ "YEAR", SYM(YEAR_SYM)},
{ "YEAR_MONTH", SYM(YEAR_MONTH_SYM)},
{ "ZEROFILL", SYM(ZEROFILL)},
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 01a1fff3048..11692c2fd9f 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1602,13 +1602,17 @@ bool select_result::check_simple_select() const
static String default_line_term("\n",default_charset_info);
static String default_escaped("\\",default_charset_info);
static String default_field_term("\t",default_charset_info);
+static String default_xml_row_term("<row>", default_charset_info);
-sql_exchange::sql_exchange(char *name,bool flag)
+sql_exchange::sql_exchange(char *name, bool flag,
+ enum enum_filetype filetype_arg)
:file_name(name), opt_enclosed(0), dumpfile(flag), skip_lines(0)
{
+ filetype= filetype_arg;
field_term= &default_field_term;
enclosed= line_start= &my_empty_string;
- line_term= &default_line_term;
+ line_term= filetype == FILETYPE_CSV ?
+ &default_line_term : &default_xml_row_term;
escaped= &default_escaped;
cs= NULL;
}
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 4874801e380..5dcf3fdaad0 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -89,6 +89,7 @@ enum enum_slave_exec_mode { SLAVE_EXEC_MODE_STRICT,
SLAVE_EXEC_MODE_LAST_BIT};
enum enum_mark_columns
{ MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE};
+enum enum_filetype { FILETYPE_CSV, FILETYPE_XML };
extern char internal_table_name[2];
extern char empty_c_string[1];
@@ -2364,13 +2365,15 @@ my_eof(THD *thd)
class sql_exchange :public Sql_alloc
{
public:
+ enum enum_filetype filetype; /* load XML, Added by Arnold & Erik */
char *file_name;
String *field_term,*enclosed,*line_term,*line_start,*escaped;
bool opt_enclosed;
bool dumpfile;
ulong skip_lines;
CHARSET_INFO *cs;
- sql_exchange(char *name,bool dumpfile_flag);
+ sql_exchange(char *name, bool dumpfile_flag,
+ enum_filetype filetype_arg= FILETYPE_CSV);
bool escaped_given(void);
};
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index 079b6f2fe43..847417a31ed 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -15,6 +15,8 @@
/* Copy data from a textfile to table */
+/* 2006-12 Erik Wetterberg : LOAD XML added */
+
#include "mysql_priv.h"
#include <my_dir.h>
#include <m_ctype.h>
@@ -23,6 +25,23 @@
#include "sp_head.h"
#include "sql_trigger.h"
+class XML_TAG {
+public:
+ int level;
+ String field;
+ String value;
+ XML_TAG(int l, String f, String v);
+};
+
+
+XML_TAG::XML_TAG(int l, String f, String v)
+{
+ level= l;
+ field.append(f);
+ value.append(v);
+}
+
+
class READ_INFO {
File file;
uchar *buffer, /* Buffer for read text */
@@ -37,6 +56,7 @@ class READ_INFO {
bool need_end_io_cache;
IO_CACHE cache;
NET *io_net;
+ int level; /* for load xml */
public:
bool error,line_cuted,found_null,enclosed;
@@ -54,6 +74,12 @@ public:
char unescape(char chr);
int terminator(char *ptr,uint length);
bool find_start_of_fields();
+ /* load xml */
+ List<XML_TAG> taglist;
+ int read_value(int delim, String *val);
+ int read_xml();
+ int clear_level(int level);
+
/*
We need to force cache close before destructor is invoked to log
the last read block
@@ -82,6 +108,13 @@ static int read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
List<Item> &set_values, READ_INFO &read_info,
String &enclosed, ulong skip_lines,
bool ignore_check_option_errors);
+
+static int read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
+ List<Item> &fields_vars, List<Item> &set_fields,
+ List<Item> &set_values, READ_INFO &read_info,
+ String &enclosed, ulong skip_lines,
+ bool ignore_check_option_errors);
+
#ifndef EMBEDDED_LIBRARY
static bool write_execute_load_query_log_event(THD *thd,
bool duplicates, bool ignore,
@@ -390,7 +423,7 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
thd->count_cuted_fields= CHECK_FIELD_WARN; /* calc cuted fields */
thd->cuted_fields=0L;
/* Skip lines if there is a line terminator */
- if (ex->line_term->length())
+ if (ex->line_term->length() && ex->filetype != FILETYPE_XML)
{
/* ex->skip_lines needs to be preserved for logging */
while (skip_lines > 0)
@@ -421,7 +454,11 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list,
(MODE_STRICT_TRANS_TABLES |
MODE_STRICT_ALL_TABLES)));
- if (!field_term->length() && !enclosed->length())
+ if (ex->filetype == FILETYPE_XML) /* load xml */
+ error= read_xml_field(thd, info, table_list, fields_vars,
+ set_fields, set_values, read_info,
+ *(ex->line_term), skip_lines, ignore);
+ else if (!field_term->length() && !enclosed->length())
error= read_fixed_length(thd, info, table_list, fields_vars,
set_fields, set_values, read_info,
skip_lines, ignore);
@@ -917,6 +954,171 @@ continue_loop:;
}
+/****************************************************************************
+** Read rows in xml format
+****************************************************************************/
+static int
+read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
+ List<Item> &fields_vars, List<Item> &set_fields,
+ List<Item> &set_values, READ_INFO &read_info,
+ String &row_tag, ulong skip_lines,
+ bool ignore_check_option_errors)
+{
+ List_iterator_fast<Item> it(fields_vars);
+ Item *item;
+ TABLE *table= table_list->table;
+ bool no_trans_update_stmt;
+ CHARSET_INFO *cs= read_info.read_charset;
+ DBUG_ENTER("read_xml_field");
+
+ no_trans_update_stmt= !table->file->has_transactions();
+
+ for ( ; ; it.rewind())
+ {
+ if (thd->killed)
+ {
+ thd->send_kill_message();
+ DBUG_RETURN(1);
+ }
+
+ // read row tag and save values into tag list
+ if (read_info.read_xml())
+ break;
+
+ List_iterator_fast<XML_TAG> xmlit(read_info.taglist);
+ xmlit.rewind();
+ XML_TAG *tag= NULL;
+
+#ifndef DBUG_OFF
+ DBUG_PRINT("read_xml_field", ("skip_lines=%d", (int) skip_lines));
+ while ((tag= xmlit++))
+ {
+ DBUG_PRINT("read_xml_field", ("got tag:%i '%s' '%s'",
+ tag->level, tag->field.c_ptr(),
+ tag->value.c_ptr()));
+ }
+#endif
+
+ restore_record(table, s->default_values);
+
+ while ((item= it++))
+ {
+ /* If this line is to be skipped we don't want to fill field or var */
+ if (skip_lines)
+ continue;
+
+ /* find field in tag list */
+ xmlit.rewind();
+ tag= xmlit++;
+
+ while(tag && strcmp(tag->field.c_ptr(), item->name) != 0)
+ tag= xmlit++;
+
+ if (!tag) // found null
+ {
+ if (item->type() == Item::FIELD_ITEM)
+ {
+ Field *field= ((Item_field *) item)->field;
+ field->reset();
+ field->set_null();
+ if (field == table->next_number_field)
+ table->auto_increment_field_not_null= TRUE;
+ if (!field->maybe_null())
+ {
+ if (field->type() == FIELD_TYPE_TIMESTAMP)
+ ((Field_timestamp *) field)->set_time();
+ else if (field != table->next_number_field)
+ field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_WARN_NULL_TO_NOTNULL, 1);
+ }
+ }
+ else
+ ((Item_user_var_as_out_param *) item)->set_null_value(cs);
+ continue;
+ }
+
+ if (item->type() == Item::FIELD_ITEM)
+ {
+
+ Field *field= ((Item_field *)item)->field;
+ field->set_notnull();
+ if (field == table->next_number_field)
+ table->auto_increment_field_not_null= TRUE;
+ field->store((char *) tag->value.ptr(), tag->value.length(), cs);
+ }
+ else
+ ((Item_user_var_as_out_param *) item)->set_value(
+ (char *) tag->value.ptr(),
+ tag->value.length(), cs);
+ }
+
+ if (read_info.error)
+ break;
+
+ if (skip_lines)
+ {
+ skip_lines--;
+ continue;
+ }
+
+ if (item)
+ {
+ /* Have not read any field, thus input file is simply ended */
+ if (item == fields_vars.head())
+ break;
+
+ for ( ; item; item= it++)
+ {
+ if (item->type() == Item::FIELD_ITEM)
+ {
+ /*
+ QQ: We probably should not throw warning for each field.
+ But how about intention to always have the same number
+ of warnings in THD::cuted_fields (and get rid of cuted_fields
+ in the end ?)
+ */
+ thd->cuted_fields++;
+ push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+ ER_WARN_TOO_FEW_RECORDS,
+ ER(ER_WARN_TOO_FEW_RECORDS),
+ thd->warning_info->current_row_for_warning());
+ }
+ else
+ ((Item_user_var_as_out_param *)item)->set_null_value(cs);
+ }
+ }
+
+ if (thd->killed ||
+ fill_record_n_invoke_before_triggers(thd, set_fields, set_values,
+ ignore_check_option_errors,
+ table->triggers,
+ TRG_EVENT_INSERT))
+ DBUG_RETURN(1);
+
+ switch (table_list->view_check_option(thd,
+ ignore_check_option_errors)) {
+ case VIEW_CHECK_SKIP:
+ read_info.next_line();
+ goto continue_loop;
+ case VIEW_CHECK_ERROR:
+ DBUG_RETURN(-1);
+ }
+
+ if (write_record(thd, table, &info))
+ DBUG_RETURN(1);
+
+ /*
+ We don't need to reset auto-increment field since we are restoring
+ its default value at the beginning of each loop iteration.
+ */
+ thd->transaction.stmt.modified_non_trans_table= no_trans_update_stmt;
+ thd->warning_info->inc_current_row_for_warning();
+ continue_loop:;
+ }
+ DBUG_RETURN(test(read_info.error) || thd->is_error());
+} /* load xml end */
+
+
/* Unescape all escape characters, mark \N as null */
char
@@ -955,6 +1157,7 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
field_term_length= field_term.length();
line_term_ptr=(char*) line_term.ptr();
line_term_length= line_term.length();
+ level= 0; /* for load xml */
if (line_start.length() == 0)
{
line_start_ptr=0;
@@ -1030,6 +1233,10 @@ READ_INFO::~READ_INFO()
my_free((uchar*) buffer,MYF(0));
error=1;
}
+ List_iterator<XML_TAG> xmlit(taglist);
+ XML_TAG *t;
+ while ((t= xmlit++))
+ delete(t);
}
@@ -1362,3 +1569,319 @@ bool READ_INFO::find_start_of_fields()
}
return 0;
}
+
+
+/*
+ Clear taglist from tags with a specified level
+*/
+int READ_INFO::clear_level(int level)
+{
+ DBUG_ENTER("READ_INFO::read_xml clear_level");
+ List_iterator<XML_TAG> xmlit(taglist);
+ xmlit.rewind();
+ XML_TAG *tag;
+
+ while ((tag= xmlit++))
+ {
+ if(tag->level >= level)
+ {
+ xmlit.remove();
+ delete tag;
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Convert an XML entity to Unicode value.
+ Return -1 on error;
+*/
+static int
+my_xml_entity_to_char(const char *name, uint length)
+{
+ if (length == 2)
+ {
+ if (!memcmp(name, "gt", length))
+ return '>';
+ if (!memcmp(name, "lt", length))
+ return '<';
+ }
+ else if (length == 3)
+ {
+ if (!memcmp(name, "amp", length))
+ return '&';
+ }
+ else if (length == 4)
+ {
+ if (!memcmp(name, "quot", length))
+ return '"';
+ if (!memcmp(name, "apos", length))
+ return '\'';
+ }
+ return -1;
+}
+
+
+/**
+ @brief Convert newline, linefeed, tab to space
+
+ @param chr character
+
+ @details According to the "XML 1.0" standard,
+ only space (#x20) characters, carriage returns,
+ line feeds or tabs are considered as spaces.
+ Convert all of them to space (#x20) for parsing simplicity.
+*/
+static int
+my_tospace(int chr)
+{
+ return (chr == '\t' || chr == '\r' || chr == '\n') ? ' ' : chr;
+}
+
+
+/*
+ Read an xml value: handle multibyte and xml escape
+*/
+int READ_INFO::read_value(int delim, String *val)
+{
+ int chr;
+ String tmp;
+
+ for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF;)
+ {
+#ifdef USE_MB
+ if (my_mbcharlen(read_charset, chr) > 1)
+ {
+ DBUG_PRINT("read_xml",("multi byte"));
+ int i, ml= my_mbcharlen(read_charset, chr);
+ for (i= 1; i < ml; i++)
+ {
+ val->append(chr);
+ /*
+ Don't use my_tospace() in the middle of a multi-byte character
+ TODO: check that the multi-byte sequence is valid.
+ */
+ chr= GET;
+ if (chr == my_b_EOF)
+ return chr;
+ }
+ }
+#endif
+ if(chr == '&')
+ {
+ tmp.length(0);
+ for (chr= my_tospace(GET) ; chr != ';' ; chr= my_tospace(GET))
+ {
+ if (chr == my_b_EOF)
+ return chr;
+ tmp.append(chr);
+ }
+ if ((chr= my_xml_entity_to_char(tmp.ptr(), tmp.length())) >= 0)
+ val->append(chr);
+ else
+ {
+ val->append('&');
+ val->append(tmp);
+ val->append(';');
+ }
+ }
+ else
+ val->append(chr);
+ chr= GET;
+ }
+ return my_tospace(chr);
+}
+
+
+/*
+ Read a record in xml format
+ tags and attributes are stored in taglist
+ when tag set in ROWS IDENTIFIED BY is closed, we are ready and return
+*/
+int READ_INFO::read_xml()
+{
+ DBUG_ENTER("READ_INFO::read_xml");
+ int chr, chr2, chr3;
+ int delim= 0;
+ String tag, attribute, value;
+ bool in_tag= false;
+
+ tag.length(0);
+ attribute.length(0);
+ value.length(0);
+
+ for (chr= my_tospace(GET); chr != my_b_EOF ; )
+ {
+ switch(chr){
+ case '<': /* read tag */
+ /* TODO: check if this is a comment <!-- comment --> */
+ chr= my_tospace(GET);
+ if(chr == '!')
+ {
+ chr2= GET;
+ chr3= GET;
+
+ if(chr2 == '-' && chr3 == '-')
+ {
+ chr2= 0;
+ chr3= 0;
+ chr= my_tospace(GET);
+
+ while(chr != '>' || chr2 != '-' || chr3 != '-')
+ {
+ if(chr == '-')
+ {
+ chr3= chr2;
+ chr2= chr;
+ }
+ else if (chr2 == '-')
+ {
+ chr2= 0;
+ chr3= 0;
+ }
+ chr= my_tospace(GET);
+ if (chr == my_b_EOF)
+ goto found_eof;
+ }
+ break;
+ }
+ }
+
+ tag.length(0);
+ while(chr != '>' && chr != ' ' && chr != '/' && chr != my_b_EOF)
+ {
+ if(chr != delim) /* fix for the '<field name =' format */
+ tag.append(chr);
+ chr= my_tospace(GET);
+ }
+
+ // row tag should be in ROWS IDENTIFIED BY '<row>' - stored in line_term
+ if((tag.length() == line_term_length -2) &&
+ (strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0))
+ {
+ DBUG_PRINT("read_xml", ("start-of-row: %i %s %s",
+ level,tag.c_ptr_safe(), line_term_ptr));
+ }
+
+ if(chr == ' ' || chr == '>')
+ {
+ level++;
+ clear_level(level + 1);
+ }
+
+ if (chr == ' ')
+ in_tag= true;
+ else
+ in_tag= false;
+ break;
+
+ case ' ': /* read attribute */
+ while(chr == ' ') /* skip blanks */
+ chr= my_tospace(GET);
+
+ if(!in_tag)
+ break;
+
+ while(chr != '=' && chr != '/' && chr != '>' && chr != my_b_EOF)
+ {
+ attribute.append(chr);
+ chr= my_tospace(GET);
+ }
+ break;
+
+ case '>': /* end tag - read tag value */
+ in_tag= false;
+ chr= read_value('<', &value);
+ if(chr == my_b_EOF)
+ goto found_eof;
+
+ /* save value to list */
+ if(tag.length() > 0 && value.length() > 0)
+ {
+ DBUG_PRINT("read_xml", ("lev:%i tag:%s val:%s",
+ level,tag.c_ptr_safe(), value.c_ptr_safe()));
+ taglist.push_front( new XML_TAG(level, tag, value));
+ }
+ tag.length(0);
+ value.length(0);
+ attribute.length(0);
+ break;
+
+ case '/': /* close tag */
+ level--;
+ chr= my_tospace(GET);
+ if(chr != '>') /* if this is an empty tag <tag /> */
+ tag.length(0); /* we should keep tag value */
+ while(chr != '>' && chr != my_b_EOF)
+ {
+ tag.append(chr);
+ chr= my_tospace(GET);
+ }
+
+ if((tag.length() == line_term_length -2) &&
+ (strncmp(tag.c_ptr_safe(), line_term_ptr + 1, tag.length()) == 0))
+ {
+ DBUG_PRINT("read_xml", ("found end-of-row %i %s",
+ level, tag.c_ptr_safe()));
+ DBUG_RETURN(0); //normal return
+ }
+ chr= my_tospace(GET);
+ break;
+
+ case '=': /* attribute name end - read the value */
+ //check for tag field and attribute name
+ if(!memcmp(tag.c_ptr_safe(), STRING_WITH_LEN("field")) &&
+ !memcmp(attribute.c_ptr_safe(), STRING_WITH_LEN("name")))
+ {
+ /*
+ this is format <field name="xx">xx</field>
+ where actual fieldname is in attribute
+ */
+ delim= my_tospace(GET);
+ tag.length(0);
+ attribute.length(0);
+ chr= '<'; /* we pretend that it is a tag */
+ level--;
+ break;
+ }
+
+ //check for " or '
+ chr= GET;
+ if (chr == my_b_EOF)
+ goto found_eof;
+ if(chr == '"' || chr == '\'')
+ {
+ delim= chr;
+ }
+ else
+ {
+ delim= ' '; /* no delimiter, use space */
+ PUSH(chr);
+ }
+
+ chr= read_value(delim, &value);
+ if(attribute.length() > 0 && value.length() > 0)
+ {
+ DBUG_PRINT("read_xml", ("lev:%i att:%s val:%s\n",
+ level + 1,
+ attribute.c_ptr_safe(),
+ value.c_ptr_safe()));
+ taglist.push_front(new XML_TAG(level + 1, attribute, value));
+ }
+ attribute.length(0);
+ value.length(0);
+ if (chr != ' ')
+ chr= my_tospace(GET);
+ break;
+
+ default:
+ chr= my_tospace(GET);
+ } /* end switch */
+ } /* end while */
+
+found_eof:
+ DBUG_PRINT("read_xml",("Found eof"));
+ eof= 1;
+ DBUG_RETURN(1);
+}
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 5a594b4a06a..3b828b69b0c 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -508,6 +508,7 @@ Item* handle_sql2003_note184_exception(THD *thd, Item* left, bool equal,
sp_head *sphead;
struct p_elem_val *p_elem_value;
enum index_hint_type index_hint;
+ enum enum_filetype filetype;
Diag_condition_item_name diag_condition_item_name;
}
@@ -1116,6 +1117,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token WRITE_SYM /* SQL-2003-N */
%token X509_SYM
%token XA_SYM
+%token XML_SYM
%token XOR
%token YEAR_MONTH_SYM
%token YEAR_SYM /* SQL-2003-R */
@@ -1309,7 +1311,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
prepare prepare_src execute deallocate
statement sp_suid
sp_c_chistics sp_a_chistics sp_chistic sp_c_chistic xa
- load_data opt_field_or_var_spec fields_or_vars opt_load_data_set_spec
+ opt_field_or_var_spec fields_or_vars opt_load_data_set_spec
view_replace_or_algorithm view_replace
view_algorithm view_or_trigger_or_sp_or_event
definer_tail no_definer_tail
@@ -1338,6 +1340,7 @@ END_OF_INPUT
%type <spname> sp_name
%type <index_hint> index_hint_type
%type <num> index_hint_clause
+%type <filetype> data_or_xml
%type <NONE> signal_stmt resignal_stmt
%type <diag_condition_item_name> signal_condition_information_item_name
@@ -10663,7 +10666,7 @@ use:
/* import, export of files */
load:
- LOAD DATA_SYM
+ LOAD data_or_xml
{
THD *thd= YYTHD;
LEX *lex= thd->lex;
@@ -10671,39 +10674,21 @@ load:
if (lex->sphead)
{
- my_error(ER_SP_BADSTATEMENT, MYF(0), "LOAD DATA");
+ my_error(ER_SP_BADSTATEMENT, MYF(0),
+ $2 == FILETYPE_CSV ? "LOAD DATA" : "LOAD XML");
MYSQL_YYABORT;
}
lex->fname_start= lip->get_ptr();
}
- load_data
- {}
- | LOAD TABLE_SYM table_ident FROM MASTER_SYM
- {
- LEX *lex=Lex;
- WARN_DEPRECATED(yythd, "6.0", "LOAD TABLE FROM MASTER",
- "MySQL Administrator (mysqldump, mysql)");
- if (lex->sphead)
- {
- my_error(ER_SP_BADSTATEMENT, MYF(0), "LOAD TABLE");
- MYSQL_YYABORT;
- }
- lex->sql_command = SQLCOM_LOAD_MASTER_TABLE;
- if (!Select->add_table_to_list(YYTHD, $3, NULL, TL_OPTION_UPDATING))
- MYSQL_YYABORT;
- }
- ;
-
-load_data:
load_data_lock opt_local INFILE TEXT_STRING_filesystem
{
LEX *lex=Lex;
lex->sql_command= SQLCOM_LOAD;
- lex->lock_option= $1;
- lex->local_file= $2;
+ lex->lock_option= $4;
+ lex->local_file= $5;
lex->duplicates= DUP_ERROR;
lex->ignore= 0;
- if (!(lex->exchange= new sql_exchange($4.str, 0)))
+ if (!(lex->exchange= new sql_exchange($7.str, 0, $2)))
MYSQL_YYABORT;
}
opt_duplicate INTO
@@ -10713,7 +10698,7 @@ load_data:
TABLE_SYM table_ident
{
LEX *lex=Lex;
- if (!Select->add_table_to_list(YYTHD, $10, NULL, TL_OPTION_UPDATING,
+ if (!Select->add_table_to_list(YYTHD, $13, NULL, TL_OPTION_UPDATING,
lex->lock_option))
MYSQL_YYABORT;
lex->field_list.empty();
@@ -10721,12 +10706,37 @@ load_data:
lex->value_list.empty();
}
opt_load_data_charset
- { Lex->exchange->cs= $12; }
+ { Lex->exchange->cs= $15; }
+ opt_xml_rows_identified_by
opt_field_term opt_line_term opt_ignore_lines opt_field_or_var_spec
opt_load_data_set_spec
{}
- | FROM MASTER_SYM
+ | LOAD TABLE_SYM table_ident FROM MASTER_SYM
{
+ LEX *lex=Lex;
+ WARN_DEPRECATED(yythd, "6.0", "LOAD TABLE FROM MASTER",
+ "MySQL Administrator (mysqldump, mysql)");
+ if (lex->sphead)
+ {
+ my_error(ER_SP_BADSTATEMENT, MYF(0), "LOAD TABLE");
+ MYSQL_YYABORT;
+ }
+ lex->sql_command = SQLCOM_LOAD_MASTER_TABLE;
+ if (!Select->add_table_to_list(YYTHD, $3, NULL, TL_OPTION_UPDATING))
+ MYSQL_YYABORT;
+ }
+ | LOAD DATA_SYM FROM MASTER_SYM
+ {
+ THD *thd= YYTHD;
+ LEX *lex= thd->lex;
+ Lex_input_stream *lip= YYLIP;
+
+ if (lex->sphead)
+ {
+ my_error(ER_SP_BADSTATEMENT, MYF(0), "LOAD DATA");
+ MYSQL_YYABORT;
+ }
+ lex->fname_start= lip->get_ptr();
Lex->sql_command = SQLCOM_LOAD_MASTER_DATA;
WARN_DEPRECATED(yythd, "6.0", "LOAD DATA FROM MASTER",
"mysqldump or future "
@@ -10734,6 +10744,11 @@ load_data:
}
;
+data_or_xml:
+ DATA_SYM { $$= FILETYPE_CSV; }
+ | XML_SYM { $$= FILETYPE_XML; }
+ ;
+
opt_local:
/* empty */ { $$=0;}
| LOCAL_SYM { $$=1;}
@@ -10820,15 +10835,25 @@ line_term:
}
;
+opt_xml_rows_identified_by:
+ /* empty */ { }
+ | ROWS_SYM IDENTIFIED_SYM BY text_string
+ { Lex->exchange->line_term = $4; };
+
opt_ignore_lines:
/* empty */
- | IGNORE_SYM NUM LINES
+ | IGNORE_SYM NUM lines_or_rows
{
DBUG_ASSERT(Lex->exchange != 0);
Lex->exchange->skip_lines= atol($2.str);
}
;
+lines_or_rows:
+ LINES { }
+ | ROWS_SYM { }
+ ;
+
opt_field_or_var_spec:
/* empty */ {}
| '(' fields_or_vars ')' {}
@@ -11924,6 +11949,7 @@ keyword_sp:
| WEEK_SYM {}
| WORK_SYM {}
| X509_SYM {}
+ | XML_SYM {}
| YEAR_SYM {}
;