diff options
-rw-r--r-- | include/my_xml.h | 23 | ||||
-rw-r--r-- | libmysqld/Makefile.am | 1 | ||||
-rw-r--r-- | mysql-test/r/xml.result | 522 | ||||
-rw-r--r-- | mysql-test/t/xml.test | 217 | ||||
-rw-r--r-- | sql/Makefile.am | 3 | ||||
-rw-r--r-- | sql/item.h | 3 | ||||
-rw-r--r-- | sql/item_create.cc | 10 | ||||
-rw-r--r-- | sql/item_create.h | 3 | ||||
-rw-r--r-- | sql/item_xmlfunc.cc | 2572 | ||||
-rw-r--r-- | sql/item_xmlfunc.h | 56 | ||||
-rw-r--r-- | sql/lex.h | 2 | ||||
-rw-r--r-- | strings/xml.c | 21 |
12 files changed, 3426 insertions, 7 deletions
diff --git a/include/my_xml.h b/include/my_xml.h index 82de995e700..0d779ea6fb9 100644 --- a/include/my_xml.h +++ b/include/my_xml.h @@ -26,8 +26,31 @@ extern "C" { #define MY_XML_OK 0 #define MY_XML_ERROR 1 +/* + A flag whether to use absolute tag names in call-back functions, + like "a", "a.b" and "a.b.c" (used in character set file parser), + or relative names like "a", "b" and "c". +*/ +#define MY_XML_FLAG_RELATIVE_NAMES 1 + +/* + A flag whether to skip normilization of text values before calling + call-back functions: i.e. skip leading/trailing spaces, + \r, \n, \t characters. +*/ +#define MY_XML_FLAG_SKIP_TEXT_NORMALIZATION 2 + +enum my_xml_node_type +{ + MY_XML_NODE_TAG, /* can have TAG, ATTR and TEXT children */ + MY_XML_NODE_ATTR, /* can have TEXT children */ + MY_XML_NODE_TEXT /* cannot have children */ +}; + typedef struct xml_stack_st { + int flags; + enum my_xml_node_type current_node_type; char errstr[128]; char attr[128]; char *attrend; diff --git a/libmysqld/Makefile.am b/libmysqld/Makefile.am index da0418eaf9c..12531a553eb 100644 --- a/libmysqld/Makefile.am +++ b/libmysqld/Makefile.am @@ -48,6 +48,7 @@ sqlsources = derror.cc field.cc field_conv.cc strfunc.cc filesort.cc \ item.cc item_buff.cc item_cmpfunc.cc item_create.cc \ item_func.cc item_strfunc.cc item_sum.cc item_timefunc.cc \ item_geofunc.cc item_uniq.cc item_subselect.cc item_row.cc\ + item_xmlfunc.cc \ key.cc lock.cc log.cc log_event.cc sql_state.c \ protocol.cc net_serv.cc opt_range.cc \ opt_sum.cc procedure.cc records.cc sql_acl.cc \ diff --git a/mysql-test/r/xml.result b/mysql-test/r/xml.result new file mode 100644 index 00000000000..693dd6eea81 --- /dev/null +++ b/mysql-test/r/xml.result @@ -0,0 +1,522 @@ +SET @xml='<a aa1="aa1" aa2="aa2">a1<b ba1="ba1">b1<c>c1</c>b2</b>a2</a>'; +SELECT extractValue(@xml,'/a'); +extractValue(@xml,'/a') +a1 a2 +SELECT extractValue(@xml,'/a/b'); +extractValue(@xml,'/a/b') +b1 b2 +SELECT extractValue(@xml,'/a/b/c'); +extractValue(@xml,'/a/b/c') +c1 +SELECT extractValue(@xml,'/a/@aa1'); +extractValue(@xml,'/a/@aa1') +aa1 +SELECT extractValue(@xml,'/a/@aa2'); +extractValue(@xml,'/a/@aa2') +aa2 +SELECT extractValue(@xml,'/a/@*'); +extractValue(@xml,'/a/@*') +aa1 aa2 +SELECT extractValue(@xml,'//@ba1'); +extractValue(@xml,'//@ba1') +ba1 +SELECT extractValue(@xml,'//a'); +extractValue(@xml,'//a') +a1 a2 +SELECT extractValue(@xml,'//b'); +extractValue(@xml,'//b') +b1 b2 +SELECT extractValue(@xml,'//c'); +extractValue(@xml,'//c') +c1 +SELECT extractValue(@xml,'/a//b'); +extractValue(@xml,'/a//b') +b1 b2 +SELECT extractValue(@xml,'/a//c'); +extractValue(@xml,'/a//c') +c1 +SELECT extractValue(@xml,'//*'); +extractValue(@xml,'//*') +a1 b1 c1 b2 a2 +SELECT extractValue(@xml,'/a//*'); +extractValue(@xml,'/a//*') +b1 c1 b2 +SELECT extractValue(@xml,'/./a'); +extractValue(@xml,'/./a') +a1 a2 +SELECT extractValue(@xml,'/a/b/.'); +extractValue(@xml,'/a/b/.') +b1 b2 +SELECT extractValue(@xml,'/a/b/..'); +extractValue(@xml,'/a/b/..') +a1 a2 +SELECT extractValue(@xml,'/a/b/../@aa1'); +extractValue(@xml,'/a/b/../@aa1') +aa1 +SELECT extractValue(@xml,'/*'); +extractValue(@xml,'/*') +a1 a2 +SELECT extractValue(@xml,'/*/*'); +extractValue(@xml,'/*/*') +b1 b2 +SELECT extractValue(@xml,'/*/*/*'); +extractValue(@xml,'/*/*/*') +c1 +SELECT extractValue(@xml,'/a/child::*'); +extractValue(@xml,'/a/child::*') +b1 b2 +SELECT extractValue(@xml,'/a/descendant::*'); +extractValue(@xml,'/a/descendant::*') +b1 c1 b2 +SELECT extractValue(@xml,'/a/descendant-or-self::*'); +extractValue(@xml,'/a/descendant-or-self::*') +a1 b1 c1 b2 a2 +SELECT extractValue(@xml,'/a/attribute::*'); +extractValue(@xml,'/a/attribute::*') +aa1 aa2 +SELECT extractValue(@xml,'/a/b/c/parent::*'); +extractValue(@xml,'/a/b/c/parent::*') +b1 b2 +SELECT extractValue(@xml,'/a/b/c/ancestor::*'); +extractValue(@xml,'/a/b/c/ancestor::*') +a1 b1 b2 a2 +SELECT extractValue(@xml,'/a/b/c/ancestor-or-self::*'); +extractValue(@xml,'/a/b/c/ancestor-or-self::*') +a1 b1 c1 b2 a2 +SELECT extractValue(@xml,'/descendant-or-self::*'); +extractValue(@xml,'/descendant-or-self::*') +a1 b1 c1 b2 a2 +SET @xml='<a>a11<b ba="ba11" ba="ba12">b11</b><b ba="ba21" ba="ba22">b21<c>c1</c>b22</b>a12</a>'; +SELECT extractValue(@xml,'/a/b/c/ancestor-or-self::*'); +extractValue(@xml,'/a/b/c/ancestor-or-self::*') +a11 b21 c1 b22 a12 +SELECT extractValue(@xml,'//@ba'); +extractValue(@xml,'//@ba') +ba11 ba12 ba21 ba22 +SET @xml='<a><b>b</b><c>c</c></a>'; +SELECT extractValue(@xml,'/a/b'); +extractValue(@xml,'/a/b') +b +SELECT extractValue(@xml,'/a/c'); +extractValue(@xml,'/a/c') +c +SELECT extractValue(@xml,'/a/child::b'); +extractValue(@xml,'/a/child::b') +b +SELECT extractValue(@xml,'/a/child::c'); +extractValue(@xml,'/a/child::c') +c +SET @xml='<a><b>b1</b><c>c1</c><b>b2</b><c>c2</c></a>'; +SELECT extractValue(@xml,'/a/b[1]'); +extractValue(@xml,'/a/b[1]') +b1 +SELECT extractValue(@xml,'/a/b[2]'); +extractValue(@xml,'/a/b[2]') +b2 +SELECT extractValue(@xml,'/a/c[1]'); +extractValue(@xml,'/a/c[1]') +c1 +SELECT extractValue(@xml,'/a/c[2]'); +extractValue(@xml,'/a/c[2]') +c2 +SET @xml='<a><b x="xb1" x="xb2"/><c x="xc1" x="xc2"/></a>'; +SELECT extractValue(@xml,'/a//@x'); +extractValue(@xml,'/a//@x') +xb1 xb2 xc1 xc2 +SELECT extractValue(@xml,'/a//@x[1]'); +extractValue(@xml,'/a//@x[1]') +xb1 xc1 +SELECT extractValue(@xml,'/a//@x[2]'); +extractValue(@xml,'/a//@x[2]') +xb2 xc2 +SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b>/a>'; +SELECT extractValue(@xml,'//b[1]'); +extractValue(@xml,'//b[1]') +b1 c1b1 c2b1 +SELECT extractValue(@xml,'/descendant::b[1]'); +extractValue(@xml,'/descendant::b[1]') +b1 +SET @xml='<a><b>b1</b><b>b2</b></a>'; +SELECT extractValue(@xml,'/a/b[1+0]'); +extractValue(@xml,'/a/b[1+0]') +b1 +SELECT extractValue(@xml,'/a/b[1*1]'); +extractValue(@xml,'/a/b[1*1]') +b1 +SELECT extractValue(@xml,'/a/b[--1]'); +extractValue(@xml,'/a/b[--1]') +b1 +SELECT extractValue(@xml,'/a/b[2*1-1]'); +extractValue(@xml,'/a/b[2*1-1]') +b1 +SELECT extractValue(@xml,'/a/b[1+1]'); +extractValue(@xml,'/a/b[1+1]') +b2 +SELECT extractValue(@xml,'/a/b[1*2]'); +extractValue(@xml,'/a/b[1*2]') +b2 +SELECT extractValue(@xml,'/a/b[--2]'); +extractValue(@xml,'/a/b[--2]') +b2 +SELECT extractValue(@xml,'/a/b[1*(3-1)]'); +extractValue(@xml,'/a/b[1*(3-1)]') +b2 +SELECT extractValue(@xml,'//*[1=1]'); +extractValue(@xml,'//*[1=1]') +b1 b2 +SELECT extractValue(@xml,'//*[1!=1]'); +extractValue(@xml,'//*[1!=1]') + +SELECT extractValue(@xml,'//*[1>1]'); +extractValue(@xml,'//*[1>1]') + +SELECT extractValue(@xml,'//*[2>1]'); +extractValue(@xml,'//*[2>1]') +b1 b2 +SELECT extractValue(@xml,'//*[1>2]'); +extractValue(@xml,'//*[1>2]') + +SELECT extractValue(@xml,'//*[1>=1]'); +extractValue(@xml,'//*[1>=1]') +b1 b2 +SELECT extractValue(@xml,'//*[2>=1]'); +extractValue(@xml,'//*[2>=1]') +b1 b2 +SELECT extractValue(@xml,'//*[1>=2]'); +extractValue(@xml,'//*[1>=2]') + +SELECT extractValue(@xml,'//*[1<1]'); +extractValue(@xml,'//*[1<1]') + +SELECT extractValue(@xml,'//*[2<1]'); +extractValue(@xml,'//*[2<1]') + +SELECT extractValue(@xml,'//*[1<2]'); +extractValue(@xml,'//*[1<2]') +b1 b2 +SELECT extractValue(@xml,'//*[1<=1]'); +extractValue(@xml,'//*[1<=1]') +b1 b2 +SELECT extractValue(@xml,'//*[2<=1]'); +extractValue(@xml,'//*[2<=1]') + +SELECT extractValue(@xml,'//*[1<=2]'); +extractValue(@xml,'//*[1<=2]') +b1 b2 +SET @xml='<a><b>b11<c>c11</c></b><b>b21<c>c21</c></b></a>'; +SELECT extractValue(@xml,'/a/b[c="c11"]'); +extractValue(@xml,'/a/b[c="c11"]') +b11 +SELECT extractValue(@xml,'/a/b[c="c21"]'); +extractValue(@xml,'/a/b[c="c21"]') +b21 +SET @xml='<a><b c="c11">b11</b><b c="c21">b21</b></a>'; +SELECT extractValue(@xml,'/a/b[@c="c11"]'); +extractValue(@xml,'/a/b[@c="c11"]') +b11 +SELECT extractValue(@xml,'/a/b[@c="c21"]'); +extractValue(@xml,'/a/b[@c="c21"]') +b21 +SET @xml='<a>a1<b c="c11">b11<d>d11</d></b><b c="c21">b21<d>d21</d></b></a>'; +SELECT extractValue(@xml, '/a/b[@c="c11"]/d'); +extractValue(@xml, '/a/b[@c="c11"]/d') +d11 +SELECT extractValue(@xml, '/a/b[@c="c21"]/d'); +extractValue(@xml, '/a/b[@c="c21"]/d') +d21 +SELECT extractValue(@xml, '/a/b[d="d11"]/@c'); +extractValue(@xml, '/a/b[d="d11"]/@c') +c11 +SELECT extractValue(@xml, '/a/b[d="d21"]/@c'); +extractValue(@xml, '/a/b[d="d21"]/@c') +c21 +SELECT extractValue(@xml, '/a[b="b11"]'); +extractValue(@xml, '/a[b="b11"]') +a1 +SELECT extractValue(@xml, '/a[b/@c="c11"]'); +extractValue(@xml, '/a[b/@c="c11"]') +a1 +SELECT extractValue(@xml, '/a[b/d="d11"]'); +extractValue(@xml, '/a[b/d="d11"]') +a1 +SELECT extractValue(@xml, '/a[/a/b="b11"]'); +extractValue(@xml, '/a[/a/b="b11"]') +a1 +SELECT extractValue(@xml, '/a[/a/b/@c="c11"]'); +extractValue(@xml, '/a[/a/b/@c="c11"]') +a1 +SELECT extractValue(@xml, '/a[/a/b/d="d11"]'); +extractValue(@xml, '/a[/a/b/d="d11"]') +a1 +SELECT extractValue('<a>a</a>', '/a[false()]'); +extractValue('<a>a</a>', '/a[false()]') + +SELECT extractValue('<a>a</a>', '/a[true()]'); +extractValue('<a>a</a>', '/a[true()]') +a +SELECT extractValue('<a>a</a>', '/a[not(false())]'); +extractValue('<a>a</a>', '/a[not(false())]') +a +SELECT extractValue('<a>a</a>', '/a[not(true())]'); +extractValue('<a>a</a>', '/a[not(true())]') + +SELECT extractValue('<a>a</a>', '/a[true() and true()]'); +extractValue('<a>a</a>', '/a[true() and true()]') +a +SELECT extractValue('<a>a</a>', '/a[true() and false()]'); +extractValue('<a>a</a>', '/a[true() and false()]') + +SELECT extractValue('<a>a</a>', '/a[false()and false()]'); +extractValue('<a>a</a>', '/a[false()and false()]') + +SELECT extractValue('<a>a</a>', '/a[false()and true()]'); +extractValue('<a>a</a>', '/a[false()and true()]') + +SELECT extractValue('<a>a</a>', '/a[true() or true()]'); +extractValue('<a>a</a>', '/a[true() or true()]') +a +SELECT extractValue('<a>a</a>', '/a[true() or false()]'); +extractValue('<a>a</a>', '/a[true() or false()]') +a +SELECT extractValue('<a>a</a>', '/a[false()or false()]'); +extractValue('<a>a</a>', '/a[false()or false()]') + +SELECT extractValue('<a>a</a>', '/a[false()or true()]'); +extractValue('<a>a</a>', '/a[false()or true()]') +a +SET @xml='<a>ab<b c="c" c="e">b1</b><b c="d">b2</b><b c="f" c="e">b3</b></a>'; +select extractValue(@xml,'/a/b[@c="c"]'); +extractValue(@xml,'/a/b[@c="c"]') +b1 +select extractValue(@xml,'/a/b[@c="d"]'); +extractValue(@xml,'/a/b[@c="d"]') +b2 +select extractValue(@xml,'/a/b[@c="e"]'); +extractValue(@xml,'/a/b[@c="e"]') +b1 b3 +select extractValue(@xml,'/a/b[not(@c="e")]'); +extractValue(@xml,'/a/b[not(@c="e")]') +b2 +select extractValue(@xml,'/a/b[@c!="e"]'); +extractValue(@xml,'/a/b[@c!="e"]') +b1 b2 b3 +select extractValue(@xml,'/a/b[@c="c" or @c="d"]'); +extractValue(@xml,'/a/b[@c="c" or @c="d"]') +b1 b2 +select extractValue(@xml,'/a/b[@c="c" and @c="e"]'); +extractValue(@xml,'/a/b[@c="c" and @c="e"]') +b1 +SET @xml='<a><b c="c" d="d">b1</b><b d="d" e="e">b2</b></a>'; +select extractValue(@xml,'/a/b[@c]'); +extractValue(@xml,'/a/b[@c]') +b1 +select extractValue(@xml,'/a/b[@d]'); +extractValue(@xml,'/a/b[@d]') +b1 b2 +select extractValue(@xml,'/a/b[@e]'); +extractValue(@xml,'/a/b[@e]') +b2 +select extractValue(@xml,'/a/b[not(@c)]'); +extractValue(@xml,'/a/b[not(@c)]') +b2 +select extractValue(@xml,'/a/b[not(@d)]'); +extractValue(@xml,'/a/b[not(@d)]') + +select extractValue(@xml,'/a/b[not(@e)]'); +extractValue(@xml,'/a/b[not(@e)]') +b1 +select extractValue(@xml, '/a/b[boolean(@c) or boolean(@d)]'); +extractValue(@xml, '/a/b[boolean(@c) or boolean(@d)]') +b1 b2 +select extractValue(@xml, '/a/b[boolean(@c) or boolean(@e)]'); +extractValue(@xml, '/a/b[boolean(@c) or boolean(@e)]') +b1 b2 +select extractValue(@xml, '/a/b[boolean(@d) or boolean(@e)]'); +extractValue(@xml, '/a/b[boolean(@d) or boolean(@e)]') +b1 b2 +select extractValue(@xml, '/a/b[boolean(@c) and boolean(@d)]'); +extractValue(@xml, '/a/b[boolean(@c) and boolean(@d)]') +b1 +select extractValue(@xml, '/a/b[boolean(@c) and boolean(@e)]'); +extractValue(@xml, '/a/b[boolean(@c) and boolean(@e)]') + +select extractValue(@xml, '/a/b[boolean(@d) and boolean(@e)]'); +extractValue(@xml, '/a/b[boolean(@d) and boolean(@e)]') +b2 +select extractValue(@xml, '/a/b[@c or @d]'); +extractValue(@xml, '/a/b[@c or @d]') +b1 b2 +select extractValue(@xml, '/a/b[@c or @e]'); +extractValue(@xml, '/a/b[@c or @e]') +b1 b2 +select extractValue(@xml, '/a/b[@d or @e]'); +extractValue(@xml, '/a/b[@d or @e]') +b1 b2 +select extractValue(@xml, '/a/b[@c and @d]'); +extractValue(@xml, '/a/b[@c and @d]') +b1 +select extractValue(@xml, '/a/b[@c and @e]'); +extractValue(@xml, '/a/b[@c and @e]') + +select extractValue(@xml, '/a/b[@d and @e]'); +extractValue(@xml, '/a/b[@d and @e]') +b2 +SET @xml='<a><b c="c">b1</b><b>b2</b></a>'; +SELECT extractValue(@xml,'/a/b[@*]'); +extractValue(@xml,'/a/b[@*]') +b1 +SELECT extractValue(@xml,'/a/b[not(@*)]'); +extractValue(@xml,'/a/b[not(@*)]') +b2 +SELECT extractValue('<a>a</a>', '/a[ceiling(3.1)=4]'); +extractValue('<a>a</a>', '/a[ceiling(3.1)=4]') +a +SELECT extractValue('<a>a</a>', '/a[floor(3.1)=3]'); +extractValue('<a>a</a>', '/a[floor(3.1)=3]') +a +SELECT extractValue('<a>a</a>', '/a[round(3.1)=3]'); +extractValue('<a>a</a>', '/a[round(3.1)=3]') +a +SELECT extractValue('<a>a</a>', '/a[round(3.8)=4]'); +extractValue('<a>a</a>', '/a[round(3.8)=4]') +a +SELECT extractValue('<a><b>b</b><c>c</c></a>', '/a/b | /a/c'); +extractValue('<a><b>b</b><c>c</c></a>', '/a/b | /a/c') +b c +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[position()=1]'); +extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[position()=1]') +b1 +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[position()=2]'); +extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[position()=2]') +b2 +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[position()=3]'); +extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[position()=3]') +b3 +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[1=position()]'); +extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[1=position()]') +b1 +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[2=position()]'); +extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[2=position()]') +b2 +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[3=position()]'); +extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[3=position()]') +b3 +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[2>=position()]'); +extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[2>=position()]') +b1 b2 +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[2<=position()]'); +extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[2<=position()]') +b2 b3 +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[position()=3 or position()=2]'); +extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[position()=3 or position()=2]') +b2 b3 +SELECT extractValue('<a>a<b>a1<c>c1</c></b><b>a2</b></a>','/a/b[count(c)=0]'); +extractValue('<a>a<b>a1<c>c1</c></b><b>a2</b></a>','/a/b[count(c)=0]') +a2 +SELECT extractValue('<a>a<b>a1<c>c1</c></b><b>a2</b></a>','/a/b[count(c)=1]'); +extractValue('<a>a<b>a1<c>c1</c></b><b>a2</b></a>','/a/b[count(c)=1]') +a1 +select extractValue('<a>a1<b ba="1" ba="2">b1</b><b>b2</b>4</a>','/a/b[sum(@ba)=3]'); +extractValue('<a>a1<b ba="1" ba="2">b1</b><b>b2</b>4</a>','/a/b[sum(@ba)=3]') +b1 +select extractValue('<a><b>b1</b><b>b2</b></a>','/a/b[1]'); +extractValue('<a><b>b1</b><b>b2</b></a>','/a/b[1]') +b1 +select extractValue('<a><b>b1</b><b>b2</b></a>','/a/b[boolean(1)]'); +extractValue('<a><b>b1</b><b>b2</b></a>','/a/b[boolean(1)]') +b1 b2 +select extractValue('<a><b>b1</b><b>b2</b></a>','/a/b[true()]'); +extractValue('<a><b>b1</b><b>b2</b></a>','/a/b[true()]') +b1 b2 +select extractValue('<a><b>b1</b><b>b2</b></a>','/a/b[number(true())]'); +extractValue('<a><b>b1</b><b>b2</b></a>','/a/b[number(true())]') +b1 +select extractValue('<a>ab</a>','/a[contains("abc","b")]'); +extractValue('<a>ab</a>','/a[contains("abc","b")]') +ab +select extractValue('<a>ab</a>','/a[contains(.,"a")]'); +extractValue('<a>ab</a>','/a[contains(.,"a")]') +ab +select extractValue('<a>ab</a>','/a[contains(.,"b")]'); +extractValue('<a>ab</a>','/a[contains(.,"b")]') +ab +select extractValue('<a>ab</a>','/a[contains(.,"c")]'); +extractValue('<a>ab</a>','/a[contains(.,"c")]') + +select extractValue('<a b="1">ab</a>','/a[concat(@b,"2")="12"]'); +extractValue('<a b="1">ab</a>','/a[concat(@b,"2")="12"]') +ab +SET @xml='<a b="11" b="12" b="21" b="22">ab</a>'; +select extractValue(@xml, '/a/@b[substring(.,2)="1"]'); +extractValue(@xml, '/a/@b[substring(.,2)="1"]') +11 21 +select extractValue(@xml, '/a/@b[substring(.,2)="2"]'); +extractValue(@xml, '/a/@b[substring(.,2)="2"]') +12 22 +select extractValue(@xml, '/a/@b[substring(.,1,1)="1"]'); +extractValue(@xml, '/a/@b[substring(.,1,1)="1"]') +11 12 +select extractValue(@xml, '/a/@b[substring(.,1,1)="2"]'); +extractValue(@xml, '/a/@b[substring(.,1,1)="2"]') +21 22 +select extractValue(@xml, '/a/@b[substring(.,2,1)="1"]'); +extractValue(@xml, '/a/@b[substring(.,2,1)="1"]') +11 21 +select extractValue(@xml, '/a/@b[substring(.,2,1)="2"]'); +extractValue(@xml, '/a/@b[substring(.,2,1)="2"]') +12 22 +SET @xml='<a b="b11" b="b12" b="b21" b="22"/>'; +select extractValue(@xml,'/a/@b'); +extractValue(@xml,'/a/@b') +b11 b12 b21 22 +select extractValue(@xml,'/a/@b[contains(.,"1")]'); +extractValue(@xml,'/a/@b[contains(.,"1")]') +b11 b12 b21 +select extractValue(@xml,'/a/@b[contains(.,"1")][contains(.,"2")]'); +extractValue(@xml,'/a/@b[contains(.,"1")][contains(.,"2")]') +b12 b21 +select extractValue(@xml,'/a/@b[contains(.,"1")][contains(.,"2")][2]'); +extractValue(@xml,'/a/@b[contains(.,"1")][contains(.,"2")][2]') +b21 +SET @xml='<a>a1<b>b1<c>c1</c>b2</b>a2</a>'; +select UpdateXML('<a>a1<b>b1<c>c1</c>b2</b>a2</a>','/a/b/c','+++++++++'); +UpdateXML('<a>a1<b>b1<c>c1</c>b2</b>a2</a>','/a/b/c','+++++++++') +<a>a1<b>b1+++++++++b2</b>a2</a> +select UpdateXML('<a>a1<b>b1<c>c1</c>b2</b>a2</a>','/a/b/c','<c1>+++++++++</c1>'); +UpdateXML('<a>a1<b>b1<c>c1</c>b2</b>a2</a>','/a/b/c','<c1>+++++++++</c1>') +<a>a1<b>b1<c1>+++++++++</c1>b2</b>a2</a> +select UpdateXML('<a>a1<b>b1<c>c1</c>b2</b>a2</a>','/a/b/c','<c1/>'); +UpdateXML('<a>a1<b>b1<c>c1</c>b2</b>a2</a>','/a/b/c','<c1/>') +<a>a1<b>b1<c1/>b2</b>a2</a> +SET @xml='<a><b>bb</b></a>'; +select UpdateXML(@xml, '/a/b', '<b>ccc</b>'); +UpdateXML(@xml, '/a/b', '<b>ccc</b>') +<a><b>ccc</b></a> +SET @xml='<a aa1="aa1" aa2="aa2"><b bb1="bb1" bb2="bb2">bb</b></a>'; +select UpdateXML(@xml, '/a/b', '<b>ccc</b>'); +UpdateXML(@xml, '/a/b', '<b>ccc</b>') +<a aa1="aa1" aa2="aa2"><b>ccc</b></a> +select UpdateXML(@xml, '/a/@aa1', ''); +UpdateXML(@xml, '/a/@aa1', '') +<a aa2="aa2"><b bb1="bb1" bb2="bb2">bb</b></a> +select UpdateXML(@xml, '/a/@aa1', 'aa3="aa3"'); +UpdateXML(@xml, '/a/@aa1', 'aa3="aa3"') +<a aa3="aa3" aa2="aa2"><b bb1="bb1" bb2="bb2">bb</b></a> +select UpdateXML(@xml, '/a/@aa2', ''); +UpdateXML(@xml, '/a/@aa2', '') +<a aa1="aa1" ><b bb1="bb1" bb2="bb2">bb</b></a> +select UpdateXML(@xml, '/a/@aa2', 'aa3="aa3"'); +UpdateXML(@xml, '/a/@aa2', 'aa3="aa3"') +<a aa1="aa1" aa3="aa3"><b bb1="bb1" bb2="bb2">bb</b></a> +select UpdateXML(@xml, '/a/b/@bb1', ''); +UpdateXML(@xml, '/a/b/@bb1', '') +<a aa1="aa1" aa2="aa2"><b bb2="bb2">bb</b></a> +select UpdateXML(@xml, '/a/b/@bb1', 'bb3="bb3"'); +UpdateXML(@xml, '/a/b/@bb1', 'bb3="bb3"') +<a aa1="aa1" aa2="aa2"><b bb3="bb3" bb2="bb2">bb</b></a> +select UpdateXML(@xml, '/a/b/@bb2', ''); +UpdateXML(@xml, '/a/b/@bb2', '') +<a aa1="aa1" aa2="aa2"><b bb1="bb1" >bb</b></a> +select UpdateXML(@xml, '/a/b/@bb2', 'bb3="bb3"'); +UpdateXML(@xml, '/a/b/@bb2', 'bb3="bb3"') +<a aa1="aa1" aa2="aa2"><b bb1="bb1" bb3="bb3">bb</b></a> diff --git a/mysql-test/t/xml.test b/mysql-test/t/xml.test new file mode 100644 index 00000000000..472f6d49300 --- /dev/null +++ b/mysql-test/t/xml.test @@ -0,0 +1,217 @@ +SET @xml='<a aa1="aa1" aa2="aa2">a1<b ba1="ba1">b1<c>c1</c>b2</b>a2</a>'; +SELECT extractValue(@xml,'/a'); +SELECT extractValue(@xml,'/a/b'); +SELECT extractValue(@xml,'/a/b/c'); +SELECT extractValue(@xml,'/a/@aa1'); +SELECT extractValue(@xml,'/a/@aa2'); +SELECT extractValue(@xml,'/a/@*'); +SELECT extractValue(@xml,'//@ba1'); + +SELECT extractValue(@xml,'//a'); +SELECT extractValue(@xml,'//b'); +SELECT extractValue(@xml,'//c'); +SELECT extractValue(@xml,'/a//b'); +SELECT extractValue(@xml,'/a//c'); +SELECT extractValue(@xml,'//*'); +SELECT extractValue(@xml,'/a//*'); +SELECT extractValue(@xml,'/./a'); +SELECT extractValue(@xml,'/a/b/.'); +SELECT extractValue(@xml,'/a/b/..'); +SELECT extractValue(@xml,'/a/b/../@aa1'); +SELECT extractValue(@xml,'/*'); +SELECT extractValue(@xml,'/*/*'); +SELECT extractValue(@xml,'/*/*/*'); + +SELECT extractValue(@xml,'/a/child::*'); +SELECT extractValue(@xml,'/a/descendant::*'); +SELECT extractValue(@xml,'/a/descendant-or-self::*'); +SELECT extractValue(@xml,'/a/attribute::*'); +SELECT extractValue(@xml,'/a/b/c/parent::*'); +SELECT extractValue(@xml,'/a/b/c/ancestor::*'); +SELECT extractValue(@xml,'/a/b/c/ancestor-or-self::*'); +SELECT extractValue(@xml,'/descendant-or-self::*'); + +SET @xml='<a>a11<b ba="ba11" ba="ba12">b11</b><b ba="ba21" ba="ba22">b21<c>c1</c>b22</b>a12</a>'; +SELECT extractValue(@xml,'/a/b/c/ancestor-or-self::*'); +SELECT extractValue(@xml,'//@ba'); + +SET @xml='<a><b>b</b><c>c</c></a>'; +SELECT extractValue(@xml,'/a/b'); +SELECT extractValue(@xml,'/a/c'); +SELECT extractValue(@xml,'/a/child::b'); +SELECT extractValue(@xml,'/a/child::c'); + +SET @xml='<a><b>b1</b><c>c1</c><b>b2</b><c>c2</c></a>'; +SELECT extractValue(@xml,'/a/b[1]'); +SELECT extractValue(@xml,'/a/b[2]'); +SELECT extractValue(@xml,'/a/c[1]'); +SELECT extractValue(@xml,'/a/c[2]'); + +SET @xml='<a><b x="xb1" x="xb2"/><c x="xc1" x="xc2"/></a>'; +SELECT extractValue(@xml,'/a//@x'); +SELECT extractValue(@xml,'/a//@x[1]'); +SELECT extractValue(@xml,'/a//@x[2]'); + +SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b>/a>'; +SELECT extractValue(@xml,'//b[1]'); +SELECT extractValue(@xml,'/descendant::b[1]'); + +SET @xml='<a><b>b1</b><b>b2</b></a>'; +SELECT extractValue(@xml,'/a/b[1+0]'); +SELECT extractValue(@xml,'/a/b[1*1]'); +SELECT extractValue(@xml,'/a/b[--1]'); +SELECT extractValue(@xml,'/a/b[2*1-1]'); + +SELECT extractValue(@xml,'/a/b[1+1]'); +SELECT extractValue(@xml,'/a/b[1*2]'); +SELECT extractValue(@xml,'/a/b[--2]'); +SELECT extractValue(@xml,'/a/b[1*(3-1)]'); + +SELECT extractValue(@xml,'//*[1=1]'); +SELECT extractValue(@xml,'//*[1!=1]'); +SELECT extractValue(@xml,'//*[1>1]'); +SELECT extractValue(@xml,'//*[2>1]'); +SELECT extractValue(@xml,'//*[1>2]'); +SELECT extractValue(@xml,'//*[1>=1]'); +SELECT extractValue(@xml,'//*[2>=1]'); +SELECT extractValue(@xml,'//*[1>=2]'); +SELECT extractValue(@xml,'//*[1<1]'); +SELECT extractValue(@xml,'//*[2<1]'); +SELECT extractValue(@xml,'//*[1<2]'); +SELECT extractValue(@xml,'//*[1<=1]'); +SELECT extractValue(@xml,'//*[2<=1]'); +SELECT extractValue(@xml,'//*[1<=2]'); + +SET @xml='<a><b>b11<c>c11</c></b><b>b21<c>c21</c></b></a>'; +SELECT extractValue(@xml,'/a/b[c="c11"]'); +SELECT extractValue(@xml,'/a/b[c="c21"]'); + +SET @xml='<a><b c="c11">b11</b><b c="c21">b21</b></a>'; +SELECT extractValue(@xml,'/a/b[@c="c11"]'); +SELECT extractValue(@xml,'/a/b[@c="c21"]'); + +SET @xml='<a>a1<b c="c11">b11<d>d11</d></b><b c="c21">b21<d>d21</d></b></a>'; +SELECT extractValue(@xml, '/a/b[@c="c11"]/d'); +SELECT extractValue(@xml, '/a/b[@c="c21"]/d'); +SELECT extractValue(@xml, '/a/b[d="d11"]/@c'); +SELECT extractValue(@xml, '/a/b[d="d21"]/@c'); +SELECT extractValue(@xml, '/a[b="b11"]'); +SELECT extractValue(@xml, '/a[b/@c="c11"]'); +SELECT extractValue(@xml, '/a[b/d="d11"]'); +SELECT extractValue(@xml, '/a[/a/b="b11"]'); +SELECT extractValue(@xml, '/a[/a/b/@c="c11"]'); +SELECT extractValue(@xml, '/a[/a/b/d="d11"]'); + +SELECT extractValue('<a>a</a>', '/a[false()]'); +SELECT extractValue('<a>a</a>', '/a[true()]'); +SELECT extractValue('<a>a</a>', '/a[not(false())]'); +SELECT extractValue('<a>a</a>', '/a[not(true())]'); +SELECT extractValue('<a>a</a>', '/a[true() and true()]'); +SELECT extractValue('<a>a</a>', '/a[true() and false()]'); +SELECT extractValue('<a>a</a>', '/a[false()and false()]'); +SELECT extractValue('<a>a</a>', '/a[false()and true()]'); +SELECT extractValue('<a>a</a>', '/a[true() or true()]'); +SELECT extractValue('<a>a</a>', '/a[true() or false()]'); +SELECT extractValue('<a>a</a>', '/a[false()or false()]'); +SELECT extractValue('<a>a</a>', '/a[false()or true()]'); + +SET @xml='<a>ab<b c="c" c="e">b1</b><b c="d">b2</b><b c="f" c="e">b3</b></a>'; +select extractValue(@xml,'/a/b[@c="c"]'); +select extractValue(@xml,'/a/b[@c="d"]'); +select extractValue(@xml,'/a/b[@c="e"]'); +select extractValue(@xml,'/a/b[not(@c="e")]'); +select extractValue(@xml,'/a/b[@c!="e"]'); +select extractValue(@xml,'/a/b[@c="c" or @c="d"]'); +select extractValue(@xml,'/a/b[@c="c" and @c="e"]'); + +SET @xml='<a><b c="c" d="d">b1</b><b d="d" e="e">b2</b></a>'; +select extractValue(@xml,'/a/b[@c]'); +select extractValue(@xml,'/a/b[@d]'); +select extractValue(@xml,'/a/b[@e]'); +select extractValue(@xml,'/a/b[not(@c)]'); +select extractValue(@xml,'/a/b[not(@d)]'); +select extractValue(@xml,'/a/b[not(@e)]'); + +select extractValue(@xml, '/a/b[boolean(@c) or boolean(@d)]'); +select extractValue(@xml, '/a/b[boolean(@c) or boolean(@e)]'); +select extractValue(@xml, '/a/b[boolean(@d) or boolean(@e)]'); +select extractValue(@xml, '/a/b[boolean(@c) and boolean(@d)]'); +select extractValue(@xml, '/a/b[boolean(@c) and boolean(@e)]'); +select extractValue(@xml, '/a/b[boolean(@d) and boolean(@e)]'); + +select extractValue(@xml, '/a/b[@c or @d]'); +select extractValue(@xml, '/a/b[@c or @e]'); +select extractValue(@xml, '/a/b[@d or @e]'); +select extractValue(@xml, '/a/b[@c and @d]'); +select extractValue(@xml, '/a/b[@c and @e]'); +select extractValue(@xml, '/a/b[@d and @e]'); + +SET @xml='<a><b c="c">b1</b><b>b2</b></a>'; +SELECT extractValue(@xml,'/a/b[@*]'); +SELECT extractValue(@xml,'/a/b[not(@*)]'); + +SELECT extractValue('<a>a</a>', '/a[ceiling(3.1)=4]'); +SELECT extractValue('<a>a</a>', '/a[floor(3.1)=3]'); +SELECT extractValue('<a>a</a>', '/a[round(3.1)=3]'); +SELECT extractValue('<a>a</a>', '/a[round(3.8)=4]'); + +SELECT extractValue('<a><b>b</b><c>c</c></a>', '/a/b | /a/c'); + +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[position()=1]'); +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[position()=2]'); +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[position()=3]'); +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[1=position()]'); +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[2=position()]'); +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[3=position()]'); +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[2>=position()]'); +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[2<=position()]'); +select extractValue('<a b="b1" b="b2" b="b3"/>','/a/@b[position()=3 or position()=2]'); + +SELECT extractValue('<a>a<b>a1<c>c1</c></b><b>a2</b></a>','/a/b[count(c)=0]'); +SELECT extractValue('<a>a<b>a1<c>c1</c></b><b>a2</b></a>','/a/b[count(c)=1]'); +select extractValue('<a>a1<b ba="1" ba="2">b1</b><b>b2</b>4</a>','/a/b[sum(@ba)=3]'); + +select extractValue('<a><b>b1</b><b>b2</b></a>','/a/b[1]'); +select extractValue('<a><b>b1</b><b>b2</b></a>','/a/b[boolean(1)]'); +select extractValue('<a><b>b1</b><b>b2</b></a>','/a/b[true()]'); +select extractValue('<a><b>b1</b><b>b2</b></a>','/a/b[number(true())]'); + +select extractValue('<a>ab</a>','/a[contains("abc","b")]'); +select extractValue('<a>ab</a>','/a[contains(.,"a")]'); +select extractValue('<a>ab</a>','/a[contains(.,"b")]'); +select extractValue('<a>ab</a>','/a[contains(.,"c")]'); + +select extractValue('<a b="1">ab</a>','/a[concat(@b,"2")="12"]'); + +SET @xml='<a b="11" b="12" b="21" b="22">ab</a>'; +select extractValue(@xml, '/a/@b[substring(.,2)="1"]'); +select extractValue(@xml, '/a/@b[substring(.,2)="2"]'); +select extractValue(@xml, '/a/@b[substring(.,1,1)="1"]'); +select extractValue(@xml, '/a/@b[substring(.,1,1)="2"]'); +select extractValue(@xml, '/a/@b[substring(.,2,1)="1"]'); +select extractValue(@xml, '/a/@b[substring(.,2,1)="2"]'); + +SET @xml='<a b="b11" b="b12" b="b21" b="22"/>'; +select extractValue(@xml,'/a/@b'); +select extractValue(@xml,'/a/@b[contains(.,"1")]'); +select extractValue(@xml,'/a/@b[contains(.,"1")][contains(.,"2")]'); +select extractValue(@xml,'/a/@b[contains(.,"1")][contains(.,"2")][2]'); + +SET @xml='<a>a1<b>b1<c>c1</c>b2</b>a2</a>'; +select UpdateXML('<a>a1<b>b1<c>c1</c>b2</b>a2</a>','/a/b/c','+++++++++'); +select UpdateXML('<a>a1<b>b1<c>c1</c>b2</b>a2</a>','/a/b/c','<c1>+++++++++</c1>'); +select UpdateXML('<a>a1<b>b1<c>c1</c>b2</b>a2</a>','/a/b/c','<c1/>'); + +SET @xml='<a><b>bb</b></a>'; +select UpdateXML(@xml, '/a/b', '<b>ccc</b>'); + +SET @xml='<a aa1="aa1" aa2="aa2"><b bb1="bb1" bb2="bb2">bb</b></a>'; +select UpdateXML(@xml, '/a/b', '<b>ccc</b>'); +select UpdateXML(@xml, '/a/@aa1', ''); +select UpdateXML(@xml, '/a/@aa1', 'aa3="aa3"'); +select UpdateXML(@xml, '/a/@aa2', ''); +select UpdateXML(@xml, '/a/@aa2', 'aa3="aa3"'); +select UpdateXML(@xml, '/a/b/@bb1', ''); +select UpdateXML(@xml, '/a/b/@bb1', 'bb3="bb3"'); +select UpdateXML(@xml, '/a/b/@bb2', ''); +select UpdateXML(@xml, '/a/b/@bb2', 'bb3="bb3"'); diff --git a/sql/Makefile.am b/sql/Makefile.am index 1090c6d48c7..f3363cc445e 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -46,6 +46,7 @@ mysqld_LDADD = @MYSQLD_EXTRA_LDFLAGS@ \ @yassl_libs@ @openssl_libs@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ item_strfunc.h item_timefunc.h item_uniq.h \ + item_xmlfunc.h \ item_create.h item_subselect.h item_row.h \ mysql_priv.h item_geofunc.h sql_bitmap.h \ procedure.h sql_class.h sql_lex.h sql_list.h \ @@ -67,7 +68,7 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \ item.cc item_sum.cc item_buff.cc item_func.cc \ item_cmpfunc.cc item_strfunc.cc item_timefunc.cc \ thr_malloc.cc item_create.cc item_subselect.cc \ - item_row.cc item_geofunc.cc \ + item_row.cc item_geofunc.cc item_xmlfunc.cc \ field.cc strfunc.cc key.cc sql_class.cc sql_list.cc \ net_serv.cc protocol.cc sql_state.c \ lock.cc my_lock.c \ diff --git a/sql/item.h b/sql/item.h index 89f673c47f5..163f11247df 100644 --- a/sql/item.h +++ b/sql/item.h @@ -393,6 +393,7 @@ public: FIELD_VARIANCE_ITEM, INSERT_VALUE_ITEM, SUBSELECT_ITEM, ROW_ITEM, CACHE_ITEM, TYPE_HOLDER, PARAM_ITEM, TRIGGER_FIELD_ITEM, DECIMAL_ITEM, + XPATH_NODESET, XPATH_NODESET_CMP, VIEW_FIXER_ITEM}; enum cond_result { COND_UNDEF,COND_OK,COND_TRUE,COND_FALSE }; @@ -555,6 +556,7 @@ public: TRUE value is true (not equal to 0) */ virtual bool val_bool(); + virtual String *val_nodeset(String*) { return 0; } /* Helper functions, see item_sum.cc */ String *val_string_from_real(String *str); String *val_string_from_int(String *str); @@ -1911,6 +1913,7 @@ public: #include "item_timefunc.h" #include "item_uniq.h" #include "item_subselect.h" +#include "item_xmlfunc.h" class Item_copy_string :public Item { diff --git a/sql/item_create.cc b/sql/item_create.cc index e8435be2614..17f1fbca471 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -502,6 +502,16 @@ Item *create_func_quote(Item* a) return new Item_func_quote(a); } +Item *create_func_xml_extractvalue(Item *a, Item *b) +{ + return new Item_func_xml_extractvalue(a, b); +} + +Item *create_func_xml_update(Item *a, Item *b, Item *c) +{ + return new Item_func_xml_update(a, b, c); +} + #ifdef HAVE_SPATIAL Item *create_func_as_wkt(Item *a) { diff --git a/sql/item_create.h b/sql/item_create.h index 35db9be3c89..c76dc6b9ad7 100644 --- a/sql/item_create.h +++ b/sql/item_create.h @@ -102,7 +102,8 @@ Item *create_load_file(Item* a); Item *create_func_is_free_lock(Item* a); Item *create_func_is_used_lock(Item* a); Item *create_func_quote(Item* a); - +Item *create_func_xml_extractvalue(Item *a, Item *b); +Item *create_func_xml_update(Item *a, Item *b, Item *c); #ifdef HAVE_SPATIAL Item *create_func_geometry_from_text(Item *a); diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc new file mode 100644 index 00000000000..79a5c8e6445 --- /dev/null +++ b/sql/item_xmlfunc.cc @@ -0,0 +1,2572 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef __GNUC__ +#pragma implementation +#endif + +#include "mysql_priv.h" +#include "my_xml.h" + + +/* + TODO: future development directions: + 1. add real constants for XPATH_NODESET_CMP and XPATH_NODESET + into enum Type in item.h. + 2. add nodeset_to_nodeset_comparator + 3. add lacking functions: + - name() + - last() + - lang() + - string() + - id() + - translate() + - local-name() + - starts-with() + - namespace-uri() + - substring-after() + - normalize-space() + - substring-before() + 4. add lacking axis: + - following-sibling + - following, + - preceding-sibling + - preceding +*/ + + +/* Structure to store a parsed XML tree */ +typedef struct my_xml_node_st +{ + uint level; /* level in XML tree, 0 means root node */ + enum my_xml_node_type type; /* node type: node, or attribute, or text */ + uint parent; /* link to the parent */ + const char *beg; /* beginning of the name or text */ + const char *end; /* end of the name or text */ + const char *tagend; /* where this tag ends */ +} MY_XML_NODE; + + +/* Lexical analizer token */ +typedef struct my_xpath_lex_st +{ + int term; /* token type, see MY_XPATH_LEX_XXXXX below */ + const char *beg; /* beginnign of the token */ + const char *end; /* end of the token */ +} MY_XPATH_LEX; + + +/* Structure to store nodesets */ +typedef struct my_xpath_flt_st +{ + uint num; /* absolute position in MY_XML_NODE array */ + uint pos; /* relative position in context */ +} MY_XPATH_FLT; + + +/* XPath function creator */ +typedef struct my_xpath_function_names_st +{ + const char *name; /* function name */ + size_t length; /* function name length */ + size_t minargs; /* min number of arguments */ + size_t maxargs; /* max number of arguments */ + Item *(*create)(struct my_xpath_st *xpath, Item **args, uint nargs); +} MY_XPATH_FUNC; + + +/* XPath query parser */ +typedef struct my_xpath_st +{ + int debug; + MY_XPATH_LEX query; /* Whole query */ + MY_XPATH_LEX lasttok; /* last scanned token */ + MY_XPATH_LEX prevtok; /* previous scanned token */ + int axis; /* last scanned axis */ + int extra; /* last scanned "extra", context dependent */ + MY_XPATH_FUNC *func; /* last scanned function creator */ + Item *item; /* current expression */ + Item *context; /* last scanned context */ + String *context_cache; /* last context provider */ + String *pxml; /* Parsed XML, an array of MY_XML_NODE */ + CHARSET_INFO *cs; /* character set/collation string comparison */ +} MY_XPATH; + + +/* Dynamic array of MY_XPATH_FLT */ +class XPathFilter :public String +{ +public: + XPathFilter() :String() {} + inline bool append_element(MY_XPATH_FLT *flt) + { + String *str= this; + return str->append((const char*)flt, (uint32) sizeof(MY_XPATH_FLT)); + } + inline bool append_element(uint32 num, uint32 pos) + { + MY_XPATH_FLT add; + add.num= num; + add.pos= pos; + return append_element(&add); + } + inline MY_XPATH_FLT *element(uint i) + { + return (MY_XPATH_FLT*) (ptr() + i * sizeof(MY_XPATH_FLT)); + } + inline uint32 numelements() + { + return length() / sizeof(MY_XPATH_FLT); + } +}; + + +/* + Common features of the functions returning a node set. +*/ +class Item_nodeset_func :public Item_str_func +{ +protected: + String tmp_value, tmp2_value; + MY_XPATH_FLT *fltbeg, *fltend; + MY_XML_NODE *nodebeg, *nodeend; + uint numnodes; +public: + String *pxml; + String context_cache; + Item_nodeset_func(String *pxml_arg) :Item_str_func(), pxml(pxml_arg) {} + Item_nodeset_func(Item *a, String *pxml_arg) + :Item_str_func(a), pxml(pxml_arg) {} + Item_nodeset_func(Item *a, Item *b, String *pxml_arg) + :Item_str_func(a, b), pxml(pxml_arg) {} + Item_nodeset_func(Item *a, Item *b, Item *c, String *pxml_arg) + :Item_str_func(a,b,c), pxml(pxml_arg) {} + void prepare_nodes() + { + nodebeg= (MY_XML_NODE*) pxml->ptr(); + nodeend= (MY_XML_NODE*) (pxml->ptr() + pxml->length()); + numnodes= nodeend - nodebeg; + } + void prepare(String *nodeset) + { + prepare_nodes(); + String *res= args[0]->val_nodeset(&tmp_value); + fltbeg= (MY_XPATH_FLT*) res->ptr(); + fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); + nodeset->length(0); + } + enum Type type() const { return XPATH_NODESET; } + String *val_str(String *str) + { + prepare_nodes(); + String *res= val_nodeset(&tmp2_value); + fltbeg= (MY_XPATH_FLT*) res->ptr(); + fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); + String active; + active.alloc(numnodes); + bzero((char*) active.ptr(), numnodes); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *node; + uint j; + for (j=0, node= nodebeg ; j < numnodes; j++, node++) + { + if (node->type == MY_XML_NODE_TEXT && + node->parent == flt->num) + active[j]= 1; + } + } + + str->length(0); + str->set_charset(collation.collation); + for (uint i=0 ; i < numnodes; i++) + { + if(active[i]) + { + if (str->length()) + str->append(" ", 1, &my_charset_latin1); + str->append(nodebeg[i].beg, nodebeg[i].end - nodebeg[i].beg); + } + } + return str; + } + enum Item_result result_type () const { return STRING_RESULT; } + void fix_length_and_dec() { max_length= MAX_BLOB_WIDTH; } + const char *func_name() const { return "nodeset"; } +}; + + +/* Returns an XML root */ +class Item_nodeset_func_rootelement :public Item_nodeset_func +{ +public: + Item_nodeset_func_rootelement(String *pxml): Item_nodeset_func(pxml) {} + const char *func_name() const { return "xpath_rootelement"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns a Union of two node sets */ +class Item_nodeset_func_union :public Item_nodeset_func +{ +public: + Item_nodeset_func_union(Item *a, Item *b, String *pxml) + :Item_nodeset_func(a, b, pxml) {} + const char *func_name() const { return "xpath_union"; } + String *val_nodeset(String *nodeset); +}; + + +/* Makes one step towards the given axis */ +class Item_nodeset_func_axisbyname :public Item_nodeset_func +{ + const char *node_name; + uint node_namelen; +public: + Item_nodeset_func_axisbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func(a, pxml), node_name(n_arg), node_namelen(l_arg) { } + const char *func_name() const { return "xpath_axisbyname"; } + bool validname(MY_XML_NODE *n) + { + if (node_name[0] == '*') + return 1; + return (node_namelen == (uint) (n->end - n->beg)) && + !memcmp(node_name, n->beg, node_namelen); + } +}; + + +/* Returns children */ +class Item_nodeset_func_childbyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_childbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml) {} + const char *func_name() const { return "xpath_childbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns descendants */ +class Item_nodeset_func_descendantbyname: public Item_nodeset_func_axisbyname +{ + bool need_self; +public: + Item_nodeset_func_descendantbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml, bool need_self_arg): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml), + need_self(need_self_arg) {} + const char *func_name() const { return "xpath_descendantbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns ancestors */ +class Item_nodeset_func_ancestorbyname: public Item_nodeset_func_axisbyname +{ + bool need_self; +public: + Item_nodeset_func_ancestorbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml, bool need_self_arg): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml), + need_self(need_self_arg) {} + const char *func_name() const { return "xpath_ancestorbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns parents */ +class Item_nodeset_func_parentbyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_parentbyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml) {} + const char *func_name() const { return "xpath_parentbyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* Returns attributes */ +class Item_nodeset_func_attributebyname: public Item_nodeset_func_axisbyname +{ +public: + Item_nodeset_func_attributebyname(Item *a, const char *n_arg, uint l_arg, + String *pxml): + Item_nodeset_func_axisbyname(a, n_arg, l_arg, pxml) {} + const char *func_name() const { return "xpath_attributebyname"; } + String *val_nodeset(String *nodeset); +}; + + +/* + Condition iterator: goes through all nodes in the current + context and checks a condition, returning those nodes + giving TRUE condition result. +*/ +class Item_nodeset_func_predicate :public Item_nodeset_func +{ +public: + Item_nodeset_func_predicate(Item *a, Item *b, String *pxml): + Item_nodeset_func(a, b, pxml) {} + const char *func_name() const { return "xpath_predicate"; } + String *val_nodeset(String *nodeset); +}; + + +/* Selects nodes with a given position in context */ +class Item_nodeset_func_elementbyindex :public Item_nodeset_func +{ +public: + Item_nodeset_func_elementbyindex(Item *a, Item *b, String *pxml): + Item_nodeset_func(a, b, pxml) { } + const char *func_name() const { return "xpath_elementbyindex"; } + String *val_nodeset(String *nodeset); +}; + + +/* + We need to distinguish a number from a boolean: + a[1] and a[true] are different things in XPath. +*/ +class Item_bool :public Item_int +{ +public: + Item_bool(int32 i): Item_int(i) {} + const char *func_name() const { return "xpath_bool"; } + bool is_bool_func() { return 1; } +}; + + +/* + Converts its argument into a boolean value. + * a number is true if it is non-zero + * a node-set is true if and only if it is non-empty + * a string is true if and only if its length is non-zero +*/ +class Item_xpath_cast_bool :public Item_int_func +{ + String *pxml; + String tmp_value; +public: + Item_xpath_cast_bool(Item *a, String *pxml_arg) + :Item_int_func(a), pxml(pxml_arg) {} + const char *func_name() const { return "xpath_cast_bool"; } + bool is_bool_func() { return 1; } + longlong val_int() + { + if (args[0]->type() == XPATH_NODESET) + { + String *flt= args[0]->val_nodeset(&tmp_value); + return flt->length() == sizeof(MY_XPATH_FLT) ? 1 : 0; + } + return args[0]->val_real() ? 1 : 0; + } +}; + + +/* + Converts its argument into a number +*/ +class Item_xpath_cast_number :public Item_real_func +{ +public: + Item_xpath_cast_number(Item *a): Item_real_func(a) {} + const char *func_name() const { return "xpath_cast_number"; } + virtual double val_real() { return args[0]->val_real(); } +}; + + +/* + Context cache, for predicate +*/ +class Item_nodeset_context_cache :public Item_nodeset_func +{ +public: + String *string_cache; + Item_nodeset_context_cache(String *str_arg, String *pxml): + Item_nodeset_func(pxml), string_cache(str_arg) { } + String *val_nodeset(String *res) + { return string_cache; } + void fix_length_and_dec() { max_length= MAX_BLOB_WIDTH; } +}; + + +class Item_func_xpath_position :public Item_int_func +{ + String *pxml; + String tmp_value; +public: + Item_func_xpath_position(Item *a, String *p) + :Item_int_func(a), pxml(p) {} + const char *func_name() const { return "xpath_position"; } + void fix_length_and_dec() { max_length=10; } + longlong val_int() + { + String *flt= args[0]->val_nodeset(&tmp_value); + if (flt->length() == sizeof(MY_XPATH_FLT)) + return ((MY_XPATH_FLT*)flt->ptr())->pos + 1; + return 0; + } +}; + + +class Item_func_xpath_count :public Item_int_func +{ + String *pxml; + String tmp_value; +public: + Item_func_xpath_count(Item *a, String *p) + :Item_int_func(a), pxml(p) {} + const char *func_name() const { return "xpath_count"; } + void fix_length_and_dec() { max_length=10; } + longlong val_int() + { + String *res= args[0]->val_nodeset(&tmp_value); + return res->length() / sizeof(MY_XPATH_FLT); + } +}; + + +class Item_func_xpath_sum :public Item_real_func +{ + String *pxml; + String tmp_value; +public: + Item_func_xpath_sum(Item *a, String *p) + :Item_real_func(a), pxml(p) {} + + const char *func_name() const { return "xpath_sum"; } + double val_real() + { + double sum= 0; + String *res= args[0]->val_nodeset(&tmp_value); + MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) res->ptr(); + MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); + uint numnodes= pxml->length() / sizeof(MY_XML_NODE); + MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml->ptr(); + + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint j= flt->num + 1; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_TEXT)) + { + char *end; + int err; + double add= my_strntod(collation.collation, (char*) node->beg, + node->end - node->beg, &end, &err); + if (!err) + sum+= add; + } + } + } + return sum; + } +}; + + +class Item_nodeset_to_const_comparator :public Item_bool_func +{ + String *pxml; + String tmp_nodeset; +public: + Item_nodeset_to_const_comparator(Item *nodeset, Item *cmpfunc, String *p) + :Item_bool_func(nodeset,cmpfunc), pxml(p) {} + enum Type type() const { return XPATH_NODESET_CMP; }; + const char *func_name() const { return "xpath_nodeset_to_const_comparator"; } + bool is_bool_func() { return 1; } + + longlong val_int() + { + Item_func *comp= (Item_func*)args[1]; + Item_string *fake= (Item_string*)(comp->arguments()[1]); + String *res= args[0]->val_nodeset(&tmp_nodeset); + MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) res->ptr(); + MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); + MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml->ptr(); + uint numnodes= pxml->length() / sizeof(MY_XML_NODE); + + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint j= flt->num + 1; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_TEXT)) + { + fake->str_value.set(node->beg, node->end - node->beg, + collation.collation); + if (args[1]->val_int()) + return 1; + } + } + } + return 0; + } +}; + + +String *Item_nodeset_func_rootelement::val_nodeset(String *nodeset) +{ + nodeset->length(0); + ((XPathFilter*)nodeset)->append_element(0, 0); + return nodeset; +} + + +String * Item_nodeset_func_union::val_nodeset(String *nodeset) +{ + uint numnodes= pxml->length() / sizeof(MY_XML_NODE); + String set0, *s0= args[0]->val_nodeset(&set0); + String set1, *s1= args[1]->val_nodeset(&set1); + String both_str; + both_str.alloc(numnodes); + char *both= (char*) both_str.ptr(); + bzero((void*)both, numnodes); + uint pos= 0; + MY_XPATH_FLT *flt; + + fltbeg= (MY_XPATH_FLT*) s0->ptr(); + fltend= (MY_XPATH_FLT*) (s0->ptr() + s0->length()); + for (flt= fltbeg; flt < fltend; flt++) + both[flt->num]= 1; + + fltbeg= (MY_XPATH_FLT*) s1->ptr(); + fltend= (MY_XPATH_FLT*) (s1->ptr() + s1->length()); + for (flt= fltbeg; flt < fltend; flt++) + both[flt->num]= 1; + + nodeset->length(0); + for (uint i= 0, pos= 0; i < numnodes; i++) + { + if (both[i]) + ((XPathFilter*)nodeset)->append_element(i, pos++); + } + return nodeset; +} + + +String *Item_nodeset_func_childbyname::val_nodeset(String *nodeset) +{ + prepare(nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint pos= 0, j= flt->num + 1 ; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_TAG) && + validname(node)) + ((XPathFilter*)nodeset)->append_element(j, pos++); + } + } + return nodeset; +} + + +String *Item_nodeset_func_descendantbyname::val_nodeset(String *nodeset) +{ + prepare(nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + uint pos= 0; + MY_XML_NODE *self= &nodebeg[flt->num]; + if (need_self && validname(self)) + ((XPathFilter*)nodeset)->append_element(flt->num,pos++); + for (uint j= flt->num + 1 ; j < numnodes ; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->type == MY_XML_NODE_TAG) && validname(node)) + ((XPathFilter*)nodeset)->append_element(j,pos++); + } + } + return nodeset; +} + + +String *Item_nodeset_func_ancestorbyname::val_nodeset(String *nodeset) +{ + char *active; + String active_str; + prepare(nodeset); + active_str.alloc(numnodes); + active= (char*) active_str.ptr(); + bzero((void*)active, numnodes); + uint pos= 0; + + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + /* + Go to the root and add all nodes on the way. + Don't add the root if context is the root itelf + */ + MY_XML_NODE *self= &nodebeg[flt->num]; + if (need_self && validname(self)) + { + active[flt->num]= 1; + pos++; + } + + for (uint j= self->parent; nodebeg[j].parent != j; j= nodebeg[j].parent) + { + if (flt->num && validname(&nodebeg[j])) + { + active[j]= 1; + pos++; + } + } + } + + for (uint j= 0; j < numnodes ; j++) + { + if (active[j]) + ((XPathFilter*)nodeset)->append_element(j, --pos); + } + return nodeset; +} + + +String *Item_nodeset_func_parentbyname::val_nodeset(String *nodeset) +{ + char *active; + String active_str; + prepare(nodeset); + active_str.alloc(numnodes); + active= (char*) active_str.ptr(); + bzero((void*)active, numnodes); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + uint j= nodebeg[flt->num].parent; + if (flt->num && validname(&nodebeg[j])) + active[j]= 1; + } + for (uint j= 0, pos= 0; j < numnodes ; j++) + { + if (active[j]) + ((XPathFilter*)nodeset)->append_element(j, pos++); + } + return nodeset; +} + + +String *Item_nodeset_func_attributebyname::val_nodeset(String *nodeset) +{ + prepare(nodeset); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + MY_XML_NODE *self= &nodebeg[flt->num]; + for (uint pos=0, j= flt->num + 1 ; j < numnodes; j++) + { + MY_XML_NODE *node= &nodebeg[j]; + if (node->level <= self->level) + break; + if ((node->parent == flt->num) && + (node->type == MY_XML_NODE_ATTR) && + validname(node)) + ((XPathFilter*)nodeset)->append_element(j, pos++); + } + } + return nodeset; +} + + +String *Item_nodeset_func_predicate::val_nodeset(String *str) +{ + Item_nodeset_func *nodeset_func= (Item_nodeset_func*) args[0]; + Item_func *comp_func= (Item_func*)args[1]; + uint pos= 0; + prepare(str); + for (MY_XPATH_FLT *flt= fltbeg; flt < fltend; flt++) + { + nodeset_func->context_cache.length(0); + ((XPathFilter*)(&nodeset_func->context_cache))->append_element(flt->num, + flt->pos); + if (comp_func->val_int()) + ((XPathFilter*)str)->append_element(flt->num, pos++); + } + return str; +}; + + +String *Item_nodeset_func_elementbyindex::val_nodeset(String *nodeset) +{ + prepare(nodeset); + int index= args[1]->val_int() - 1; + if (index >= 0) + { + MY_XPATH_FLT *flt; + uint pos; + for (pos= 0, flt= fltbeg; flt < fltend; flt++) + { + if (flt->pos == (uint) index || args[1]->is_bool_func()) + ((XPathFilter*)nodeset)->append_element(flt->num, pos++); + } + } + return nodeset; +} + + +/* + If item is a node set, then casts it to boolean, + otherwise returns the item itself. +*/ +static Item* nodeset2bool(MY_XPATH *xpath, Item *item) +{ + if (item->type() == Item::XPATH_NODESET) + return new Item_xpath_cast_bool(item, xpath->pxml); + return item; +} + + +/* + XPath lexical tokens +*/ +#define MY_XPATH_LEX_DIGITS 'd' +#define MY_XPATH_LEX_IDENT 'i' +#define MY_XPATH_LEX_STRING 's' +#define MY_XPATH_LEX_SLASH '/' +#define MY_XPATH_LEX_LB '[' +#define MY_XPATH_LEX_RB ']' +#define MY_XPATH_LEX_LP '(' +#define MY_XPATH_LEX_RP ')' +#define MY_XPATH_LEX_EQ '=' +#define MY_XPATH_LEX_LESS '<' +#define MY_XPATH_LEX_GREATER '>' +#define MY_XPATH_LEX_AT '@' +#define MY_XPATH_LEX_COLON ':' +#define MY_XPATH_LEX_ASTERISK '*' +#define MY_XPATH_LEX_DOT '.' +#define MY_XPATH_LEX_VLINE '|' +#define MY_XPATH_LEX_MINUS '-' +#define MY_XPATH_LEX_PLUS '+' +#define MY_XPATH_LEX_EXCL '!' +#define MY_XPATH_LEX_COMMA ',' +#define MY_XPATH_LEX_DOLLAR '$' +#define MY_XPATH_LEX_ERROR 'A' +#define MY_XPATH_LEX_EOF 'B' +#define MY_XPATH_LEX_AND 'C' +#define MY_XPATH_LEX_OR 'D' +#define MY_XPATH_LEX_DIV 'E' +#define MY_XPATH_LEX_MOD 'F' +#define MY_XPATH_LEX_FUNC 'G' +#define MY_XPATH_LEX_NODETYPE 'H' +#define MY_XPATH_LEX_AXIS 'I' +#define MY_XPATH_LEX_LE 'J' +#define MY_XPATH_LEX_GE 'K' + + +/* + XPath axis type +*/ +#define MY_XPATH_AXIS_ANCESTOR 0 +#define MY_XPATH_AXIS_ANCESTOR_OR_SELF 1 +#define MY_XPATH_AXIS_ATTRIBUTE 2 +#define MY_XPATH_AXIS_CHILD 3 +#define MY_XPATH_AXIS_DESCENDANT 4 +#define MY_XPATH_AXIS_DESCENDANT_OR_SELF 5 +#define MY_XPATH_AXIS_FOLLOWING 6 +#define MY_XPATH_AXIS_FOLLOWING_SIBLING 7 +#define MY_XPATH_AXIS_NAMESPACE 8 +#define MY_XPATH_AXIS_PARENT 9 +#define MY_XPATH_AXIS_PRECEDING 10 +#define MY_XPATH_AXIS_PRECEDING_SIBLING 11 +#define MY_XPATH_AXIS_SELF 12 + + +/* + Create scalar comparator + + SYNOPSYS + Create a comparator function for scalar arguments, + for the given arguments and operation. + + RETURN + The newly created item. +*/ +static Item *eq_func(int oper, Item *a, Item *b) +{ + switch (oper) + { + case '=': return new Item_func_eq(a, b); + case '!': return new Item_func_ne(a, b); + case MY_XPATH_LEX_GE: return new Item_func_ge(a, b); + case MY_XPATH_LEX_LE: return new Item_func_le(a, b); + case MY_XPATH_LEX_GREATER: return new Item_func_gt(a, b); + case MY_XPATH_LEX_LESS: return new Item_func_lt(a, b); + } + return 0; +} + + +/* + Create scalar comparator + + SYNOPSYS + Create a comparator function for scalar arguments, + for the given arguments and reverse operation, e.g. + + A >= B is converted into A < B + + RETURN + The newly created item. +*/ +static Item *eq_func_reverse(int oper, Item *a, Item *b) +{ + switch (oper) + { + case '=': return new Item_func_eq(a, b); + case '!': return new Item_func_ne(a, b); + case MY_XPATH_LEX_GE: return new Item_func_lt(a, b); + case MY_XPATH_LEX_LE: return new Item_func_gt(a, b); + case MY_XPATH_LEX_GREATER: return new Item_func_le(a, b); + case MY_XPATH_LEX_LESS: return new Item_func_ge(a, b); + } + return 0; +} + + +/* + Create a comparator + + SYNOPSYS + Create a comparator for scalar or non-scalar arguments, + for the given arguments and operation. + + RETURN + The newly created item. +*/ +static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b) +{ + if (a->type() != Item::XPATH_NODESET && + b->type() != Item::XPATH_NODESET) + { + return eq_func(oper, a, b); // two scalar arguments + } + else if (a->type() == Item::XPATH_NODESET && + b->type() == Item::XPATH_NODESET) + { + return 0; // TODO: Comparison of two nodesets + } + else + { + /* + Compare a node set to a scalar value. + We just create a fake Item_string() argument, + which will be filled to the partular value + in a loop through all of the nodes in the node set. + */ + + Item *fake= new Item_string("", 0, xpath->cs); + Item_nodeset_func *nodeset; + Item *scalar, *comp; + if (a->type() == Item::XPATH_NODESET) + { + nodeset= (Item_nodeset_func*) a; + scalar= b; + comp= eq_func(oper, scalar, fake); + } + else + { + nodeset= (Item_nodeset_func*) b; + scalar= a; + comp= eq_func_reverse(oper, scalar, fake); + } + return new Item_nodeset_to_const_comparator(nodeset, comp, xpath->pxml); + } +} + + +/* + Create a step + + SYNOPSYS + Create a step function for the given argument and axis. + + RETURN + The newly created item. +*/ +static Item* nametestfunc(MY_XPATH *xpath, + int type, Item *arg, const char *beg, uint len) +{ + DBUG_ASSERT(arg != 0); + DBUG_ASSERT(arg->type() == Item::XPATH_NODESET); + DBUG_ASSERT(beg != 0); + DBUG_ASSERT(len > 0); + + Item *res; + switch (type) + { + case MY_XPATH_AXIS_ANCESTOR: + res= new Item_nodeset_func_ancestorbyname(arg, beg, len, xpath->pxml, 0); + break; + case MY_XPATH_AXIS_ANCESTOR_OR_SELF: + res= new Item_nodeset_func_ancestorbyname(arg, beg, len, xpath->pxml, 1); + break; + case MY_XPATH_AXIS_PARENT: + res= new Item_nodeset_func_parentbyname(arg, beg, len, xpath->pxml); + break; + case MY_XPATH_AXIS_DESCENDANT: + res= new Item_nodeset_func_descendantbyname(arg, beg, len, xpath->pxml, 0); + break; + case MY_XPATH_AXIS_DESCENDANT_OR_SELF: + res= new Item_nodeset_func_descendantbyname(arg, beg, len, xpath->pxml, 1); + break; + case MY_XPATH_AXIS_ATTRIBUTE: + res= new Item_nodeset_func_attributebyname(arg, beg, len, xpath->pxml); + break; + default: + res= new Item_nodeset_func_childbyname(arg, beg, len, xpath->pxml); + } + return res; +} + + +/* + Tokens consisting of one character, for faster lexical analizer. +*/ +static char simpletok[128]= +{ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +/* + ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ + ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ € +*/ + 0,1,0,0,1,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0, + 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0 +}; + + +/* + XPath keywords +*/ +struct my_xpath_keyword_names_st +{ + int tok; + const char *name; + size_t length; + int extra; +}; + + +static struct my_xpath_keyword_names_st my_keyword_names[] = +{ + {MY_XPATH_LEX_AND , "and" , 3, 0 }, + {MY_XPATH_LEX_OR , "or" , 2, 0 }, + {MY_XPATH_LEX_DIV , "div" , 3, 0 }, + {MY_XPATH_LEX_MOD , "mod" , 3, 0 }, + + {MY_XPATH_LEX_NODETYPE, "comment" , 7, 0 }, + {MY_XPATH_LEX_NODETYPE, "text" , 4, 0 }, + {MY_XPATH_LEX_NODETYPE, "processing-instruction" , 22,0 }, + {MY_XPATH_LEX_NODETYPE, "node" , 4, 0 }, + + {MY_XPATH_LEX_AXIS,"ancestor" , 8,MY_XPATH_AXIS_ANCESTOR }, + {MY_XPATH_LEX_AXIS,"ancestor-or-self" ,16,MY_XPATH_AXIS_ANCESTOR_OR_SELF }, + {MY_XPATH_LEX_AXIS,"attribute" , 9,MY_XPATH_AXIS_ATTRIBUTE }, + {MY_XPATH_LEX_AXIS,"child" , 5,MY_XPATH_AXIS_CHILD }, + {MY_XPATH_LEX_AXIS,"descendant" ,10,MY_XPATH_AXIS_DESCENDANT }, + {MY_XPATH_LEX_AXIS,"descendant-or-self",18,MY_XPATH_AXIS_DESCENDANT_OR_SELF}, + {MY_XPATH_LEX_AXIS,"following" , 9,MY_XPATH_AXIS_FOLLOWING }, + {MY_XPATH_LEX_AXIS,"following-sibling" ,17,MY_XPATH_AXIS_FOLLOWING_SIBLING }, + {MY_XPATH_LEX_AXIS,"namespace" , 9,MY_XPATH_AXIS_NAMESPACE }, + {MY_XPATH_LEX_AXIS,"parent" , 6,MY_XPATH_AXIS_PARENT }, + {MY_XPATH_LEX_AXIS,"preceding" , 9,MY_XPATH_AXIS_PRECEDING }, + {MY_XPATH_LEX_AXIS,"preceding-sibling" ,17,MY_XPATH_AXIS_PRECEDING_SIBLING }, + {MY_XPATH_LEX_AXIS,"self" , 4,MY_XPATH_AXIS_SELF }, + + {0,NULL,0,0} +}; + + +/* + Lookup a keyword + + SYNOPSYS + Check that the last scanned identifier is a keyword. + + RETURN + - Token type, on lookup success. + - MY_XPATH_LEX_IDENT, on lookup failure. +*/ +static int my_xpath_keyword(MY_XPATH *x, const char *beg, const char *end) +{ + struct my_xpath_keyword_names_st *k; + size_t length= end-beg; + for (k= my_keyword_names; k->name; k++) + { + if (length == k->length && !strncasecmp(beg, k->name, length)) + { + x->extra= k->extra; + return k->tok; + } + } + return MY_XPATH_LEX_IDENT; +} + + +/* + Functions to create an item, a-la those in item_create.cc +*/ + +static Item *create_func_true(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_bool(1); +} + + +static Item *create_func_false(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_bool(0); +} + + +static Item *create_func_not(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_not(nodeset2bool(xpath, args[0])); +} + + +static Item *create_func_ceiling(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_ceiling(args[0]); +} + + +static Item *create_func_floor(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_floor(args[0]); +} + + +static Item *create_func_bool(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_xpath_cast_bool(args[0], xpath->pxml); +} + + +static Item *create_func_number(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_xpath_cast_number(args[0]); +} + + +static Item *create_func_round(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_round(args[0], new Item_int((char*)"0",0,1),0); +} + + +static Item *create_func_last(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_xpath_count(xpath->context, xpath->pxml); +} + + +static Item *create_func_position(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_xpath_position(xpath->context, xpath->pxml); +} + + +static Item *create_func_contains(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_xpath_cast_bool(new Item_func_locate(args[0], args[1]), + xpath->pxml); +} + + +static Item *create_func_concat(MY_XPATH *xpath, Item **args, uint nargs) +{ + return new Item_func_concat(args[0], args[1]); +} + + +static Item *create_func_substr(MY_XPATH *xpath, Item **args, uint nargs) +{ + if (nargs == 2) + return new Item_func_substr(args[0], args[1]); + else + return new Item_func_substr(args[0], args[1], args[2]); +} + + +static Item *create_func_count(MY_XPATH *xpath, Item **args, uint nargs) +{ + if (args[0]->type() != Item::XPATH_NODESET) + return 0; + return new Item_func_xpath_count(args[0], xpath->pxml); +} + + +static Item *create_func_sum(MY_XPATH *xpath, Item **args, uint nargs) +{ + if (args[0]->type() != Item::XPATH_NODESET) + return 0; + return new Item_func_xpath_sum(args[0], xpath->pxml); +} + + +/* + Functions names. Separate lists for names with + lengths 3,4,5 and 6 for faster lookups. +*/ +static MY_XPATH_FUNC my_func_names3[]= +{ + {"sum", 3, 1 , 1 , create_func_sum}, + {"not", 3, 1 , 1 , create_func_not}, + {0 , 0, 0 , 0, 0} +}; + + +static MY_XPATH_FUNC my_func_names4[]= +{ + {"last", 4, 0, 0, create_func_last}, + {"true", 4, 0, 0, create_func_true}, + {"name", 4, 0, 1, 0}, + {"lang", 4, 1, 1, 0}, + {0 , 0, 0, 0, 0} +}; + + +static MY_XPATH_FUNC my_func_names5[]= +{ + {"count", 5, 1, 1, create_func_count}, + {"false", 5, 0, 0, create_func_false}, + {"floor", 5, 1, 1, create_func_floor}, + {"round", 5, 1, 1, create_func_round}, + {0 , 0, 0, 0, 0} +}; + + +static MY_XPATH_FUNC my_func_names6[]= +{ + {"concat", 6, 2, 255, create_func_concat}, + {"number", 6, 0, 1 , create_func_number}, + {"string", 6, 0, 1 , 0}, + {0 , 0, 0, 0 , 0} +}; + + +/* Other functions, with name longer than 6, all together */ +static MY_XPATH_FUNC my_func_names[] = +{ + {"id" , 2 , 1 , 1 , 0}, + {"boolean" , 7 , 1 , 1 , create_func_bool}, + {"ceiling" , 7 , 1 , 1 , create_func_ceiling}, + {"position" , 8 , 0 , 0 , create_func_position}, + {"contains" , 8 , 2 , 2 , create_func_contains}, + {"substring" , 9 , 2 , 3 , create_func_substr}, + {"translate" , 9 , 3 , 3 , 0}, + + {"local-name" , 10 , 0 , 1 , 0}, + {"starts-with" , 11 , 2 , 2 , 0}, + {"namespace-uri" , 13 , 0 , 1 , 0}, + {"substring-after" , 15 , 2 , 2 , 0}, + {"normalize-space" , 15 , 0 , 1 , 0}, + {"substring-before" , 16 , 2 , 2 , 0}, + + {NULL,0,0,0,0} +}; + + +/* + Lookup a function by name + + SYNOPSYS + Lookup a function by its name. + + RETURN + Pointer to a MY_XPATH_FUNC variable on success. + 0 - on failure. + +*/ +MY_XPATH_FUNC * +my_xpath_function(const char *beg, const char *end) +{ + MY_XPATH_FUNC *k, *function_names; + uint length= end-beg; + switch (length) + { + case 1: return 0; + case 3: function_names= my_func_names3; break; + case 4: function_names= my_func_names4; break; + case 5: function_names= my_func_names5; break; + case 6: function_names= my_func_names6; break; + default: function_names= my_func_names; + } + for (k= function_names; k->name; k++) + if (k->create && length == k->length && !strncasecmp(beg, k->name, length)) + return k; + return NULL; +} + + +/* Initialize a lex analizer token */ +static void +my_xpath_lex_init(MY_XPATH_LEX *lex, + const char *str, const char *strend) +{ + lex->beg= str; + lex->end= strend; +} + + +/* Initialize an XPath query parser */ +static void +my_xpath_init(MY_XPATH *xpath) +{ + bzero((void*)xpath, sizeof(xpath[0])); +} + + +/* + Some ctype-alike helper functions. Note, we cannot + reuse cs->ident_map[], because in Xpath, unlike in SQL, + dash character is a valid identifier part. +*/ +static int +my_xident_beg(int c) +{ + return (((c) >= 'a' && (c) <= 'z') || + ((c) >= 'A' && (c) <= 'Z') || + ((c) == '_')); +} + + +static int +my_xident_body(int c) +{ + return (((c) >= 'a' && (c) <= 'z') || + ((c) >= 'A' && (c) <= 'Z') || + ((c) >= '0' && (c) <= '9') || + ((c)=='-')); +} + + +static int +my_xdigit(int c) +{ + return ((c) >= '0' && (c) <= '9'); +} + + +/* + Scan the next token + + SYNOPSYS + Scan the next token from the input. + lex->term is set to the scanned token type. + lex->beg and lex->end are set to the beginnig + and to the end of the token. + RETURN + N/A +*/ +static void +my_xpath_lex_scan(MY_XPATH *xpath, + MY_XPATH_LEX *lex, const char *beg, const char *end) +{ + int ch; + for ( ; beg < end && *beg == ' ' ; beg++); // skip leading spaces + lex->beg= beg; + + if (beg >= end) + { + lex->end= beg; + lex->term= MY_XPATH_LEX_EOF; // end of line reached + return; + } + ch= *beg++; + + if (ch > 0 && ch < 128 && simpletok[ch]) + { + // a token consisting of one character found + lex->end= beg; + lex->term= ch; + return; + } + + if (my_xident_beg(ch)) // ident, or a function call, or a keyword + { + // scan until the end of the identifier + for ( ; beg < end && my_xident_body(*beg); beg++); + lex->end= beg; + + // check if a function call + if (*beg == '(' && (xpath->func= my_xpath_function(lex->beg, beg))) + { + lex->term= MY_XPATH_LEX_FUNC; + return; + } + + // check if a keyword + lex->term= my_xpath_keyword(xpath, lex->beg, beg); + return; + } + + if (my_xdigit(ch)) // a sequence of digits + { + for ( ; beg < end && my_xdigit(*beg) ; beg++); + lex->end= beg; + lex->term= MY_XPATH_LEX_DIGITS; + return; + } + + if (ch == '"' || ch == '\'') // a string: either '...' or "..." + { + for ( ; beg < end && *beg != ch ; beg++); + if (beg < end) + { + lex->end= beg+1; + lex->term= MY_XPATH_LEX_STRING; + return; + } + else + { + // unexpected end-of-line, without closing quot sign + lex->end= end; + lex->term= MY_XPATH_LEX_ERROR; + return; + } + } + + lex->end= beg; + lex->term= MY_XPATH_LEX_ERROR; // unknown character + return; +} + + +/* + Scan the given token + + SYNOPSYS + Scan the given token and rotate lasttok to prevtok on success. + + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_term(MY_XPATH *xpath, int term) +{ + if (xpath->lasttok.term == term) + { + xpath->prevtok= xpath->lasttok; + my_xpath_lex_scan(xpath, &xpath->lasttok, + xpath->lasttok.end, xpath->query.end); + return 1; + } + return 0; +} + + +/* + Scan AxisName + + SYNOPSYS + Scan an axis name and store the scanned axis type into xpath->axis. + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AxisName(MY_XPATH *xpath) +{ + int rc= my_xpath_parse_term(xpath, MY_XPATH_LEX_AXIS); + xpath->axis= xpath->extra; + return rc; +} + + +/********************************************* +** Grammar rules, according to http://www.w3.org/TR/xpath +** Implemented using recursive descendant method. +** All the following grammar processing functions accept +** a signle "xpath" argument and return 1 on success and 0 on error. +** They also modify "xpath" argument by creating new items. +*/ + +/* [9] PredicateExpr ::= Expr */ +#define my_xpath_parse_PredicateExpr(x) my_xpath_parse_Expr((x)) + +/* [14] Expr ::= OrExpr */ +#define my_xpath_parse_Expr(x) my_xpath_parse_OrExpr((x)) + +static int my_xpath_parse_LocationPath(MY_XPATH *xpath); +static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath); +static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath); +static int my_xpath_parse_AbbreviatedAbsoluteLocationPath(MY_XPATH *xpath); +static int my_xpath_parse_AbbreviatedStep(MY_XPATH *xpath); +static int my_xpath_parse_Step(MY_XPATH *xpath); +static int my_xpath_parse_AxisSpecifier(MY_XPATH *xpath); +static int my_xpath_parse_NodeTest(MY_XPATH *xpath); +static int my_xpath_parse_AbbreviatedAxisSpecifier(MY_XPATH *xpath); +static int my_xpath_parse_NameTest(MY_XPATH *xpath); +static int my_xpath_parse_FunctionCall(MY_XPATH *xpath); +static int my_xpath_parse_Number(MY_XPATH *xpath); +static int my_xpath_parse_FilterExpr(MY_XPATH *xpath); +static int my_xpath_parse_PathExpr(MY_XPATH *xpath); +static int my_xpath_parse_OrExpr(MY_XPATH *xpath); +static int my_xpath_parse_UnaryExpr(MY_XPATH *xpath); +static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath); +static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath); +static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath); +static int my_xpath_parse_AndExpr(MY_XPATH *xpath); +static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath); +static int my_xpath_parse_VariableReference(MY_XPATH *xpath); +static int my_xpath_parse_slash_opt_slash(MY_XPATH *xpath); + + +/* + Scan LocationPath + + SYNOPSYS + + [1] LocationPath ::= RelativeLocationPath + | AbsoluteLocationPath + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_LocationPath(MY_XPATH *xpath) +{ + Item *context= xpath->context; + + int rc= my_xpath_parse_RelativeLocationPath(xpath) || + my_xpath_parse_AbsoluteLocationPath(xpath); + + xpath->item= xpath->context; + xpath->context= context; + return rc; +} + + +/* + Scan Absolute Location Path + + SYNOPSYS + + [2] AbsoluteLocationPath ::= '/' RelativeLocationPath? + | AbbreviatedAbsoluteLocationPath + [10] AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath + + We combine these two rules into one rule for better performance: + + [2,10] AbsoluteLocationPath ::= '/' RelativeLocationPath? + | '//' RelativeLocationPath + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + return 0; + + xpath->context= new Item_nodeset_func_rootelement(xpath->pxml); + + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + { + xpath->context= new Item_nodeset_func_descendantbyname(xpath->context, + "*", 1, + xpath->pxml, 1); + return my_xpath_parse_RelativeLocationPath(xpath); + } + + if (my_xpath_parse_RelativeLocationPath(xpath)) + return 1; + + return 1; +} + + +/* + Scan Relative Location Path + + SYNOPSYS + + For better performance we combine these two rules + + [3] RelativeLocationPath ::= Step + | RelativeLocationPath '/' Step + | AbbreviatedRelativeLocationPath + [11] AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step + + + Into this one: + + [3-11] RelativeLocationPath ::= Step + | RelativeLocationPath '/' Step + | RelativeLocationPath '//' Step + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath) +{ + if (!my_xpath_parse_Step(xpath)) + return 0; + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + { + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + xpath->context= new Item_nodeset_func_descendantbyname(xpath->context, + "*", 1, + xpath->pxml, 1); + if (!my_xpath_parse_Step(xpath)) + return 0; + } + return 1; +} + + +/* + Scan non-abbreviated or abbreviated Step + + SYNOPSYS + + [4] Step ::= AxisSpecifier NodeTest Predicate* + | AbbreviatedStep + [8] Predicate ::= '[' PredicateExpr ']' + + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(MY_XPATH *xpath) +{ + if (!my_xpath_parse_AxisSpecifier(xpath)) + return 0; + + if (!my_xpath_parse_NodeTest(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_LB)) + { + Item *prev_context= xpath->context; + String *context_cache; + context_cache= &((Item_nodeset_func*)xpath->context)->context_cache; + xpath->context= new Item_nodeset_context_cache(context_cache, xpath->pxml); + xpath->context_cache= context_cache; + + if(!my_xpath_parse_PredicateExpr(xpath)) + return 0; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB)) + return 0; + + xpath->item= nodeset2bool(xpath, xpath->item); + + if (xpath->item->is_bool_func()) + { + xpath->context= new Item_nodeset_func_predicate(prev_context, + xpath->item, + xpath->pxml); + } + else + { + xpath->context= new Item_nodeset_func_elementbyindex(prev_context, + xpath->item, + xpath->pxml); + } + } + return 1; +} + + +static int my_xpath_parse_Step(MY_XPATH *xpath) +{ + return + my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(xpath) || + my_xpath_parse_AbbreviatedStep(xpath); +} + + +/* + Scan Abbreviated Axis Specifier + + SYNOPSYS + [5] AxisSpecifier ::= AxisName '::' + | AbbreviatedAxisSpecifier + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AbbreviatedAxisSpecifier(MY_XPATH *xpath) +{ + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_AT)) + xpath->axis= MY_XPATH_AXIS_ATTRIBUTE; + else + xpath->axis= MY_XPATH_AXIS_CHILD; + return 1; +} + + +/* + Scan non-abbreviated axis specifier + + SYNOPSYS + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AxisName_colon_colon(MY_XPATH *xpath) +{ + return my_xpath_parse_AxisName(xpath) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_COLON) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_COLON); +} + + +/* + Scan Abbreviated AxisSpecifier + + SYNOPSYS + [13] AbbreviatedAxisSpecifier ::= '@'? + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AxisSpecifier(MY_XPATH *xpath) +{ + return my_xpath_parse_AxisName_colon_colon(xpath) || + my_xpath_parse_AbbreviatedAxisSpecifier(xpath); +} + + +/* + Scan NodeType followed by parens + + SYNOPSYS + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_NodeTest_lp_rp(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_NODETYPE) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_LP) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_RP); +} + + +/* + Scan NodeTest + + SYNOPSYS + + [7] NodeTest ::= NameTest + | NodeType '(' ')' + | 'processing-instruction' '(' Literal ')' + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_NodeTest(MY_XPATH *xpath) +{ + return my_xpath_parse_NameTest(xpath) || + my_xpath_parse_NodeTest_lp_rp(xpath); +} + + +/* + Scan Abbreviated Step + + SYNOPSYS + + [12] AbbreviatedStep ::= '.' | '..' + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AbbreviatedStep(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) + return 0; + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) + xpath->context= new Item_nodeset_func_parentbyname(xpath->context, "*", 1, + xpath->pxml); + return 1; +} + + +/* + Scan Primary Expression + + SYNOPSYS + + [15] PrimaryExpr ::= VariableReference + | '(' Expr ')' + | Literal + | Number + | FunctionCall + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_lp_Expr_rp(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_LP) && + my_xpath_parse_Expr(xpath) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_RP); +} +static int my_xpath_parse_PrimaryExpr_literal(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_STRING)) + return 0; + xpath->item= new Item_string(xpath->prevtok.beg + 1, + xpath->prevtok.end - xpath->prevtok.beg - 2, + xpath->cs); + return 1; +} +static int my_xpath_parse_PrimaryExpr(MY_XPATH *xpath) +{ + return + my_xpath_parse_lp_Expr_rp(xpath) || + my_xpath_parse_VariableReference(xpath) || + my_xpath_parse_PrimaryExpr_literal(xpath) || + my_xpath_parse_Number(xpath) || + my_xpath_parse_FunctionCall(xpath); +} + + +/* + Scan Function Call + + SYNOPSYS + [16] FunctionCall ::= FunctionName '(' ( Argument ( ',' Argument )* )? ')' + [17] Argument ::= Expr + + RETURN + 1 - success + 0 - failure + +*/ +static int my_xpath_parse_FunctionCall(MY_XPATH *xpath) +{ + Item *args[256]; + uint nargs; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_FUNC)) + return 0; + + MY_XPATH_FUNC *func= xpath->func; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_LP)) + return 0; + + for (nargs= 0 ; nargs < func->maxargs; ) + { + if (!my_xpath_parse_Expr(xpath)) + return 0; + args[nargs++]= xpath->item; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_COMMA)) + { + if (nargs < func->minargs) + return 0; + else + break; + } + } + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RP)) + return 0; + + return ((xpath->item= func->create(xpath, args, nargs))) ? 1 : 0; +} + + +/* + Scan Union Expression + + SYNOPSYS + [18] UnionExpr ::= PathExpr + | UnionExpr '|' PathExpr + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_UnionExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_PathExpr(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_VLINE)) + { + Item *prev= xpath->item; + if (prev->type() != Item::XPATH_NODESET) + return 0; + + if (!my_xpath_parse_PathExpr(xpath) + || xpath->item->type() != Item::XPATH_NODESET) + return 0; + xpath->item= new Item_nodeset_func_union(prev, xpath->item, xpath->pxml); + } + return 1; +} + + +/* + Scan Path Expression + + SYNOPSYS + + [19] PathExpr ::= LocationPath + | FilterExpr + | FilterExpr '/' RelativeLocationPath + | FilterExpr '//' RelativeLocationPath + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(MY_XPATH *xpath) +{ + if (!my_xpath_parse_FilterExpr(xpath)) + return 0; + + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH)) + return 1; + + my_xpath_parse_term(xpath, MY_XPATH_LEX_SLASH); + return my_xpath_parse_RelativeLocationPath(xpath); +} +static int my_xpath_parse_PathExpr(MY_XPATH *xpath) +{ + return my_xpath_parse_LocationPath(xpath) || + my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath); +} + + + +/* + Scan Filter Expression + + SYNOPSYS + [20] FilterExpr ::= PrimaryExpr + | FilterExpr Predicate + + or in other words: + + [20] FilterExpr ::= PrimaryExpr Predicate* + + RETURN + 1 - success + 0 - failure + +*/ +static int my_xpath_parse_FilterExpr(MY_XPATH *xpath) +{ + return my_xpath_parse_PrimaryExpr(xpath); +} + + +/* + Scan Or Expression + + SYNOPSYS + [21] OrExpr ::= AndExpr + | OrExpr 'or' AndExpr + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_OrExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_AndExpr(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_OR)) + { + Item *prev= xpath->item; + if (!my_xpath_parse_AndExpr(xpath)) + return 0; + xpath->item= new Item_cond_or(nodeset2bool(xpath, prev), + nodeset2bool(xpath, xpath->item)); + } + return 1; +} + + +/* + Scan And Expression + + SYNOPSYS + [22] AndExpr ::= EqualityExpr + | AndExpr 'and' EqualityExpr + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AndExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_EqualityExpr(xpath)) + return 0; + + while (my_xpath_parse_term(xpath, MY_XPATH_LEX_AND)) + { + Item *prev= xpath->item; + if (!my_xpath_parse_EqualityExpr(xpath)) + return 0; + + xpath->item= new Item_cond_and(nodeset2bool(xpath,prev), + nodeset2bool(xpath,xpath->item)); + } + return 1; +} + + +/* + Scan Equality Expression + + SYNOPSYS + [23] EqualityExpr ::= RelationalExpr + | EqualityExpr '=' RelationalExpr + | EqualityExpr '!=' RelationalExpr + or in other words: + + [23] EqualityExpr ::= RelationalExpr ( EqualityOperator EqualityExpr )* + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_ne(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_EXCL) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ); +} +static int my_xpath_parse_EqualityOperator(MY_XPATH *xpath) +{ + if (my_xpath_parse_ne(xpath)) + { + xpath->extra= '!'; + return 1; + } + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ)) + { + xpath->extra= '='; + return 1; + } + return 0; +} +static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_RelationalExpr(xpath)) + return 0; + while (my_xpath_parse_EqualityOperator(xpath)) + { + Item *prev= xpath->item; + int oper= xpath->extra; + if (!my_xpath_parse_RelationalExpr(xpath)) + return 0; + + if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) + return 0; + } + return 1; +} + + +/* + Scan Relational Expression + + SYNOPSYS + + [24] RelationalExpr ::= AdditiveExpr + | RelationalExpr '<' AdditiveExpr + | RelationalExpr '>' AdditiveExpr + | RelationalExpr '<=' AdditiveExpr + | RelationalExpr '>=' AdditiveExpr + or in other words: + + [24] RelationalExpr ::= AdditiveExpr (RelationalOperator RelationalExpr)* + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_RelationalOperator(MY_XPATH *xpath) +{ + if (my_xpath_parse_term(xpath, MY_XPATH_LEX_LESS)) + { + xpath->extra= my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ) ? + MY_XPATH_LEX_LE : MY_XPATH_LEX_LESS; + return 1; + } + else if (my_xpath_parse_term(xpath, MY_XPATH_LEX_GREATER)) + { + xpath->extra= my_xpath_parse_term(xpath, MY_XPATH_LEX_EQ) ? + MY_XPATH_LEX_GE : MY_XPATH_LEX_GREATER; + return 1; + } + return 0; +} +static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_AdditiveExpr(xpath)) + return 0; + while (my_xpath_parse_RelationalOperator(xpath)) + { + Item *prev= xpath->item; + int oper= xpath->extra; + + if (!my_xpath_parse_AdditiveExpr(xpath)) + return 0; + + if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) + return 0; + } + return 1; +} + + +/* + Scan Additive Expression + + SYNOPSYS + + [25] AdditiveExpr ::= MultiplicativeExpr + | AdditiveExpr '+' MultiplicativeExpr + | AdditiveExpr '-' MultiplicativeExpr + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_AdditiveOperator(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_PLUS) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_MINUS); +} +static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_MultiplicativeExpr(xpath)) + return 0; + + while (my_xpath_parse_AdditiveOperator(xpath)) + { + int oper= xpath->prevtok.term; + Item *prev= xpath->item; + if (!my_xpath_parse_MultiplicativeExpr(xpath)) + return 0; + + if (oper == MY_XPATH_LEX_PLUS) + xpath->item= new Item_func_plus(prev, xpath->item); + else + xpath->item= new Item_func_minus(prev, xpath->item); + }; + return 1; +} + + +/* + Scan Multiplicative Expression + + SYNOPSYS + + [26] MultiplicativeExpr ::= UnaryExpr + | MultiplicativeExpr MultiplyOperator UnaryExpr + | MultiplicativeExpr 'div' UnaryExpr + | MultiplicativeExpr 'mod' UnaryExpr + or in other words: + + [26] MultiplicativeExpr ::= UnaryExpr (MulOper MultiplicativeExpr)* + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_MultiplicativeOperator(MY_XPATH *xpath) +{ + return + my_xpath_parse_term(xpath, MY_XPATH_LEX_ASTERISK) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_DIV) || + my_xpath_parse_term(xpath, MY_XPATH_LEX_MOD); +} +static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_UnaryExpr(xpath)) + return 0; + + while (my_xpath_parse_MultiplicativeOperator(xpath)) + { + int oper= xpath->prevtok.term; + Item *prev= xpath->item; + if (!my_xpath_parse_UnaryExpr(xpath)) + return 0; + switch (oper) + { + case MY_XPATH_LEX_ASTERISK: + xpath->item= new Item_func_mul(prev, xpath->item); + break; + case MY_XPATH_LEX_DIV: + xpath->item= new Item_func_int_div(prev, xpath->item); + break; + case MY_XPATH_LEX_MOD: + xpath->item= new Item_func_mod(prev, xpath->item); + break; + } + } + return 1; +} + + +/* + Scan Unary Expression + + SYNOPSYS + + [27] UnaryExpr ::= UnionExpr + | '-' UnaryExpr + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_UnaryExpr(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_MINUS)) + return my_xpath_parse_UnionExpr(xpath); + if (!my_xpath_parse_UnaryExpr(xpath)) + return 0; + xpath->item= new Item_func_neg(xpath->item); + return 1; +} + + +/* + Scan Number + + SYNOPSYS + + [30] Number ::= Digits ('.' Digits?)? | '.' Digits) + + or in other words: + + [30] Number ::= Digits + | Digits '.' + | Digits '.' Digits + | '.' Digits + + Note: the last rule is not supported yet, + as it is in conflict with abbreviated step. + 1 + .123 does not work, + 1 + 0.123 does. + Perhaps it is better to move this code into lex analizer. + + RETURN + 1 - success + 0 - failure +*/ +static int my_xpath_parse_Number(MY_XPATH *xpath) +{ + const char *beg; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DIGITS)) + return 0; + beg= xpath->prevtok.beg; + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_DOT)) + { + xpath->item= new Item_int(xpath->prevtok.beg, + xpath->prevtok.end - xpath->prevtok.beg); + return 1; + } + my_xpath_parse_term(xpath, MY_XPATH_LEX_DIGITS); + + xpath->item= new Item_float(beg, xpath->prevtok.end - beg); + return 1; +} + + +/* + Scan Variable reference + + SYNOPSYS + + [36] VariableReference ::= '$' QName + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_VariableReference(MY_XPATH *xpath) +{ + return my_xpath_parse_term(xpath, MY_XPATH_LEX_DOLLAR) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_IDENT); +} + + +/* + Scan Name Test + + SYNOPSYS + + [37] NameTest ::= '*' + | NCName ':' '*' + | QName + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse_NodeTest_QName(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_IDENT)) + return 0; + DBUG_ASSERT(xpath->context); + uint len= xpath->prevtok.end - xpath->prevtok.beg; + xpath->context= nametestfunc(xpath, xpath->axis, xpath->context, + xpath->prevtok.beg, len); + return 1; +} +static int +my_xpath_parse_NodeTest_asterisk(MY_XPATH *xpath) +{ + if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_ASTERISK)) + return 0; + DBUG_ASSERT(xpath->context); + xpath->context= nametestfunc(xpath, xpath->axis, xpath->context, "*", 1); + return 1; +} +static int +my_xpath_parse_NameTest(MY_XPATH *xpath) +{ + return my_xpath_parse_NodeTest_asterisk(xpath) || + my_xpath_parse_NodeTest_QName(xpath); +} + + +/* + Scan an XPath expression + + SYNOPSYS + Scan xpath expression. + The expression is returned in xpath->expr. + + RETURN + 1 - success + 0 - failure +*/ +static int +my_xpath_parse(MY_XPATH *xpath, const char *str, const char *strend) +{ + my_xpath_lex_init(&xpath->query, str, strend); + my_xpath_lex_init(&xpath->prevtok, str, strend); + my_xpath_lex_scan(xpath, &xpath->lasttok, str, strend); + + return + my_xpath_parse_Expr(xpath) && + my_xpath_parse_term(xpath, MY_XPATH_LEX_EOF); +} + + +void Item_xml_str_func::fix_length_and_dec() +{ + String *xp, tmp; + MY_XPATH xpath; + int rc; + + nodeset_func= 0; + + if (agg_arg_charsets(collation, args, arg_count, MY_COLL_CMP_CONV)) + return; + + if (collation.collation->mbminlen > 1) + { + /* UCS2 is not supported */ + my_printf_error(ER_UNKNOWN_ERROR, + "Character set '%s' is not supported by XPATH", + MYF(0), collation.collation->csname); + return; + } + + if (!args[1]->const_item()) + { + my_printf_error(ER_UNKNOWN_ERROR, + "Only constant XPATH queries are supported", MYF(0)); + return; + } + + xp= args[1]->val_str(&tmp); + my_xpath_init(&xpath); + xpath.cs= collation.collation; + xpath.debug= 0; + xpath.pxml= &pxml; + + rc= my_xpath_parse(&xpath, xp->ptr(), xp->ptr() + xp->length()); + + if (!rc) + { + char context[32]; + uint clen= xpath.query.end - xpath.lasttok.beg; + set_if_bigger(clen, sizeof(context) - 1); + memcpy(context, xpath.lasttok.beg, clen); + context[clen]= '\0'; + my_printf_error(ER_UNKNOWN_ERROR, "XPATH syntax error: '%s'", + MYF(0), context); + return; + } + + nodeset_func= xpath.item; + if (nodeset_func) + nodeset_func->fix_fields(current_thd, &nodeset_func); + max_length= MAX_BLOB_WIDTH; +} + + +#define MAX_LEVEL 256 +typedef struct +{ + uint level; + String *pxml; // parsed XML + uint pos[MAX_LEVEL]; // Tag position stack +} MY_XML_USER_DATA; + + +/* + Find the parent node + + SYNOPSYS + Find the parent node, i.e. a tag or attrubute node on the given level. + + RETURN + 1 - success + 0 - failure +*/ +static uint xml_parent_tag(MY_XML_NODE *items, uint nitems, uint level) +{ + if (!nitems) + return 0; + + MY_XML_NODE *p, *last= &items[nitems-1]; + for (p= last; p >= items; p--) + { + if (p->level == level && + (p->type == MY_XML_NODE_TAG || + p->type == MY_XML_NODE_ATTR)) + { + return p - items; + } + } + return 0; +} + + +/* + Process tag beginning + + SYNOPSYS + + A call-back function executed when XML parser + is entering a tag or an attribue. + Appends the new node into data->pxml. + Increments data->level. + + RETURN + Currently only MY_XML_OK +*/ +static int xml_enter(MY_XML_PARSER *st,const char *attr, uint len) +{ + MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; + MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); + uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE); + uint parent= xml_parent_tag(nodes, numnodes, data->level - 1); + MY_XML_NODE node; + + data->pos[data->level]= numnodes; + node.level= data->level++; + node.type= st->current_node_type; // TAG or ATTR + node.beg= attr; + node.end= attr + len; + node.parent= parent; + data->pxml->append((const char*) &node, sizeof(MY_XML_NODE)); + return MY_XML_OK; +} + + +/* + Process text node + + SYNOPSYS + + A call-back function executed when XML parser + is entering into a tag or an attribue textual value. + The value is appended into data->pxml. + + RETURN + Currently only MY_XML_OK +*/ +static int xml_value(MY_XML_PARSER *st,const char *attr, uint len) +{ + MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; + MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); + uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE); + uint parent= xml_parent_tag(nodes, numnodes, data->level - 1); + MY_XML_NODE node; + + node.level= data->level; + node.type= MY_XML_NODE_TEXT; + node.beg= attr; + node.end= attr + len; + node.parent= parent; + data->pxml->append((const char*) &node, sizeof(MY_XML_NODE)); + return MY_XML_OK; +} + + +/* + Leave a tag or an attribute + + SYNOPSYS + + A call-back function executed when XML parser + is leaving a tag or an attribue. + Decrements data->level. + + RETURN + Currently only MY_XML_OK +*/ +static int xml_leave(MY_XML_PARSER *st,const char *attr, uint len) +{ + MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; + DBUG_ASSERT(data->level > 0); + data->level--; + + MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); + nodes+= data->pos[data->level]; + nodes->tagend= st->cur; + + return MY_XML_OK; +} + + +/* + Parse raw XML + + SYNOPSYS + + + RETURN + Currently pointer to parsed XML on success + 0 on parse error +*/ +String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf) +{ + MY_XML_PARSER p; + MY_XML_USER_DATA user_data; + int rc; + + parsed_xml_buf->length(0); + + /* Prepare XML parser */ + my_xml_parser_create(&p); + p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION; + user_data.level= 0; + user_data.pxml= parsed_xml_buf; + my_xml_set_enter_handler(&p, xml_enter); + my_xml_set_value_handler(&p, xml_value); + my_xml_set_leave_handler(&p, xml_leave); + my_xml_set_user_data(&p, (void*) &user_data); + + /* Add root node */ + p.current_node_type= MY_XML_NODE_TAG; + xml_enter(&p, raw_xml->ptr(), 0); + + /* Execute XML parser */ + rc= my_xml_parse(&p, raw_xml->ptr(), raw_xml->length()); + my_xml_parser_free(&p); + + return rc == MY_XML_OK ? parsed_xml_buf : 0; +} + + +String *Item_func_xml_extractvalue::val_str(String *str) +{ + String *res; + if (!nodeset_func || + !(res= args[0]->val_str(str)) || + !parse_xml(res, &pxml)) + { + null_value= 1; + return 0; + } + res= nodeset_func->val_str(&tmp_value); + return res; +} + + +String *Item_func_xml_update::val_str(String *str) +{ + String *res, *nodeset, *rep; + + if (!nodeset_func || + !(res= args[0]->val_str(str)) || + !(rep= args[2]->val_str(&tmp_value3)) || + !parse_xml(res, &pxml) || + !(nodeset= nodeset_func->val_nodeset(&tmp_value2))) + { + null_value= 1; + return 0; + } + + MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml.ptr(); + MY_XML_NODE *nodeend= (MY_XML_NODE*) pxml.ptr() + pxml.length(); + MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) nodeset->ptr(); + MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (nodeset->ptr() + nodeset->length()); + + /* Allow replacing of one tag only */ + if (fltend - fltbeg != 1) + { + /* TODO: perhaps add a warning that more than one tag selected */ + return res; + } + + nodebeg+= fltbeg->num; + + tmp_value.length(0); + tmp_value.set_charset(collation.collation); + uint offs= nodebeg->type == MY_XML_NODE_TAG ? 1 : 0; + tmp_value.append(res->ptr(), nodebeg->beg - res->ptr() - offs); + tmp_value.append(rep->ptr(), rep->length()); + const char *end= nodebeg->tagend + offs; + tmp_value.append(end, res->ptr() + res->length() - end); + return &tmp_value; +} diff --git a/sql/item_xmlfunc.h b/sql/item_xmlfunc.h new file mode 100644 index 00000000000..bc47e9c5bb1 --- /dev/null +++ b/sql/item_xmlfunc.h @@ -0,0 +1,56 @@ +/* Copyright (C) 2000-2005 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +/* This file defines all XML functions */ + + +#ifdef __GNUC__ +#pragma interface /* gcc class implementation */ +#endif + + +class Item_xml_str_func: public Item_str_func +{ +protected: + String tmp_value, pxml; + Item *nodeset_func; +public: + Item_xml_str_func(Item *a, Item *b): Item_str_func(a,b) {} + Item_xml_str_func(Item *a, Item *b, Item *c): Item_str_func(a,b,c) {} + void fix_length_and_dec(); + String *parse_xml(String *raw_xml, String *parsed_xml_buf); +}; + + +class Item_func_xml_extractvalue: public Item_xml_str_func +{ +public: + Item_func_xml_extractvalue(Item *a,Item *b) :Item_xml_str_func(a,b) {} + const char *func_name() const { return "extractvalue"; } + String *val_str(String *); +}; + + +class Item_func_xml_update: public Item_xml_str_func +{ + String tmp_value2, tmp_value3; +public: + Item_func_xml_update(Item *a,Item *b,Item *c) :Item_xml_str_func(a,b,c) {} + const char *func_name() const { return "updatexml"; } + String *val_str(String *); +}; + diff --git a/sql/lex.h b/sql/lex.h index e3cbebf4629..cf83fc9488c 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -631,6 +631,7 @@ static SYMBOL sql_functions[] = { { "EQUALS", F_SYM(FUNC_ARG2),0,CREATE_FUNC_GEOM(create_func_equals)}, { "EXTERIORRING", F_SYM(FUNC_ARG1),0,CREATE_FUNC_GEOM(create_func_exteriorring)}, { "EXTRACT", SYM(EXTRACT_SYM)}, + { "EXTRACTVALUE", F_SYM(FUNC_ARG2),0,CREATE_FUNC(create_func_xml_extractvalue)}, { "EXP", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_exp)}, { "EXPORT_SET", SYM(EXPORT_SET)}, { "FIELD", SYM(FIELD_FUNC)}, /* For compability */ @@ -785,6 +786,7 @@ static SYMBOL sql_functions[] = { { "UNHEX", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_unhex)}, { "UNIQUE_USERS", SYM(UNIQUE_USERS)}, { "UNIX_TIMESTAMP", SYM(UNIX_TIMESTAMP)}, + { "UPDATEXML", F_SYM(FUNC_ARG3),0,CREATE_FUNC(create_func_xml_update)}, { "UPPER", F_SYM(FUNC_ARG1),0,CREATE_FUNC(create_func_ucase)}, { "UUID", F_SYM(FUNC_ARG0),0,CREATE_FUNC(create_func_uuid)}, { "VARIANCE", SYM(VARIANCE_SYM)}, diff --git a/strings/xml.c b/strings/xml.c index 767cb004d34..705da592257 100644 --- a/strings/xml.c +++ b/strings/xml.c @@ -104,7 +104,8 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) a->end=p->cur; if (a->beg[0] == p->cur[0])p->cur++; a->beg++; - my_xml_norm_text(a); + if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION)) + my_xml_norm_text(a); lex=MY_XML_STRING; } else @@ -148,7 +149,10 @@ static int my_xml_enter(MY_XML_PARSER *st, const char *str, uint len) memcpy(st->attrend,str,len); st->attrend+=len; st->attrend[0]='\0'; - return st->enter ? st->enter(st,st->attr,st->attrend-st->attr) : MY_XML_OK; + if (st->flags & MY_XML_FLAG_RELATIVE_NAMES) + return st->enter ? st->enter(st, str, len) : MY_XML_OK; + else + return st->enter ? st->enter(st,st->attr,st->attrend-st->attr) : MY_XML_OK; } @@ -167,7 +171,7 @@ static int my_xml_leave(MY_XML_PARSER *p, const char *str, uint slen) char s[32]; char g[32]; int rc; - + /* Find previous '.' or beginning */ for( e=p->attrend; (e>p->attr) && (e[0] != '.') ; e--); glen = (uint) ((e[0] == '.') ? (p->attrend-e-1) : p->attrend-e); @@ -180,7 +184,10 @@ static int my_xml_leave(MY_XML_PARSER *p, const char *str, uint slen) return MY_XML_ERROR; } - rc = p->leave_xml ? p->leave_xml(p,p->attr,p->attrend-p->attr) : MY_XML_OK; + if (p->flags & MY_XML_FLAG_RELATIVE_NAMES) + rc= p->leave_xml ? p->leave_xml(p, str, slen) : MY_XML_OK; + else + rc = p->leave_xml ? p->leave_xml(p,p->attr,p->attrend-p->attr) : MY_XML_OK; *e='\0'; p->attrend=e; @@ -240,6 +247,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len) if (MY_XML_IDENT == lex) { + p->current_node_type= MY_XML_NODE_TAG; if (MY_XML_OK != my_xml_enter(p,a.beg,(uint) (a.end-a.beg))) return MY_XML_ERROR; } @@ -259,6 +267,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len) lex=my_xml_scan(p,&b); if ( (lex == MY_XML_IDENT) || (lex == MY_XML_STRING) ) { + p->current_node_type= MY_XML_NODE_ATTR; if ((MY_XML_OK != my_xml_enter(p,a.beg,(uint) (a.end-a.beg))) || (MY_XML_OK != my_xml_value(p,b.beg,(uint) (b.end-b.beg))) || (MY_XML_OK != my_xml_leave(p,a.beg,(uint) (a.end-a.beg)))) @@ -273,6 +282,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len) } else if ((MY_XML_STRING == lex) || (MY_XML_IDENT == lex)) { + p->current_node_type= MY_XML_NODE_ATTR; if ((MY_XML_OK != my_xml_enter(p,a.beg,(uint) (a.end-a.beg))) || (MY_XML_OK != my_xml_leave(p,a.beg,(uint) (a.end-a.beg)))) return MY_XML_ERROR; @@ -319,7 +329,8 @@ gt: for ( ; (p->cur < p->end) && (p->cur[0] != '<') ; p->cur++); a.end=p->cur; - my_xml_norm_text(&a); + if (!(p->flags & MY_XML_FLAG_SKIP_TEXT_NORMALIZATION)) + my_xml_norm_text(&a); if (a.beg != a.end) { my_xml_value(p,a.beg,(uint) (a.end-a.beg)); |