diff options
author | unknown <bar@mysql.com/bar.intranet.mysql.r18.ru> | 2006-09-14 11:47:19 +0500 |
---|---|---|
committer | unknown <bar@mysql.com/bar.intranet.mysql.r18.ru> | 2006-09-14 11:47:19 +0500 |
commit | 32ede45dd416e3ddbcf26ed0704cd86886f5bbd4 (patch) | |
tree | dc4747cdb58f41bf99ee390562be5094d0e63af3 | |
parent | 9e89ea6fa8734c2538a416b29fc3120447f6b733 (diff) | |
download | mariadb-git-32ede45dd416e3ddbcf26ed0704cd86886f5bbd4.tar.gz |
Bug#20854 XML functions: wrong result in ExtractValue
mysql-test/r/xml.result:
- Adding test case
- Fixing error message
mysql-test/t/xml.test:
Adding test case
sql/item_xmlfunc.cc:
For grammar rules with loops like:
AdditiveExpr ::= MultiplicativeExpr ('+' MultiplicativeExpr)*
If we scanned scanned '+' and then met an error when parsing
MultiplicativeExpr, then we should fully stop parsing - without
trying to apply any other rules.
Fix: add "error" member into MY_XPATH structure,
and make my_xpath_parse_term() never return success
as soon as error set.
strings/xml.c:
Adding my_xml_ctype map for flags, indicating
whether a character is a space character, is a
valid identifier start character, is a valid
identifier body character. Using this map to
properly scan identifiers. Also, using this map
to scan spaces faster (instead of strchr).
-rw-r--r-- | mysql-test/r/xml.result | 28 | ||||
-rw-r--r-- | mysql-test/t/xml.test | 16 | ||||
-rw-r--r-- | sql/item_xmlfunc.cc | 64 | ||||
-rw-r--r-- | strings/xml.c | 60 |
4 files changed, 152 insertions, 16 deletions
diff --git a/mysql-test/r/xml.result b/mysql-test/r/xml.result index 780be01d7ce..efe7d14095d 100644 --- a/mysql-test/r/xml.result +++ b/mysql-test/r/xml.result @@ -570,7 +570,7 @@ select extractvalue('<a>a<b>B</b></a>','a|/b'); extractvalue('<a>a<b>B</b></a>','a|/b') a select extractvalue('<a>A</a>','/<a>'); -ERROR HY000: XPATH syntax error: '<a>' +ERROR HY000: XPATH error: comparison of two nodesets is not supported: '<a>' select extractvalue('<a><b>b</b><b!>b!</b!></a>','//b!'); ERROR HY000: XPATH syntax error: '!' select extractvalue('<a>A<b>B<c>C</c></b></a>','/a/descendant::*'); @@ -710,3 +710,29 @@ Data select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something'); extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something') Otherdata +select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02'); +ERROR HY000: XPATH syntax error: '02' +select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*'); +extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*') +NULL +Warnings: +Warning 1512 Incorrect XML value: 'parse error at line 1 pos 13: unknown token unexpected (ident or '/' wanted)' +select extractValue('<.>test</.>','//*'); +extractValue('<.>test</.>','//*') +NULL +Warnings: +Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)' +select extractValue('<->test</->','//*'); +extractValue('<->test</->','//*') +NULL +Warnings: +Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)' +select extractValue('<:>test</:>','//*'); +extractValue('<:>test</:>','//*') +test +select extractValue('<_>test</_>','//*'); +extractValue('<_>test</_>','//*') +test +select extractValue('<x.-_:>test</x.-_:>','//*'); +extractValue('<x.-_:>test</x.-_:>','//*') +test diff --git a/mysql-test/t/xml.test b/mysql-test/t/xml.test index d510a61f04d..3347573b4b7 100644 --- a/mysql-test/t/xml.test +++ b/mysql-test/t/xml.test @@ -360,3 +360,19 @@ select extractValue('<ns:element xmlns:ns="myns">a</ns:element>','/ns:element/@x # select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/foo.bar'); select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something'); + +# +# Bug#20854 XML functions: wrong result in ExtractValue +# +--error 1105 +select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02'); +select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*'); +# dot and dash are bad identtifier start character +select extractValue('<.>test</.>','//*'); +select extractValue('<->test</->','//*'); +# semicolon is good identifier start character +select extractValue('<:>test</:>','//*'); +# underscore is good identifier start character +select extractValue('<_>test</_>','//*'); +# dot, dash, underscore and semicolon are good identifier middle characters +select extractValue('<x.-_:>test</x.-_:>','//*'); diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc index dfa2d2a7325..44a2b690bac 100644 --- a/sql/item_xmlfunc.cc +++ b/sql/item_xmlfunc.cc @@ -105,6 +105,7 @@ typedef struct my_xpath_st String *context_cache; /* last context provider */ String *pxml; /* Parsed XML, an array of MY_XML_NODE */ CHARSET_INFO *cs; /* character set/collation string comparison */ + int error; } MY_XPATH; @@ -913,7 +914,9 @@ static Item *eq_func_reverse(int oper, Item *a, Item *b) RETURN The newly created item. */ -static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b) +static Item *create_comparator(MY_XPATH *xpath, + int oper, MY_XPATH_LEX *context, + Item *a, Item *b) { if (a->type() != Item::XPATH_NODESET && b->type() != Item::XPATH_NODESET) @@ -923,6 +926,13 @@ static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b) else if (a->type() == Item::XPATH_NODESET && b->type() == Item::XPATH_NODESET) { + uint len= context->end - context->beg; + set_if_bigger(len, 32); + my_printf_error(ER_UNKNOWN_ERROR, + "XPATH error: " + "comparison of two nodesets is not supported: '%.*s'", + MYF(0), len, context->beg); + return 0; // TODO: Comparison of two nodesets } else @@ -1430,7 +1440,7 @@ my_xpath_lex_scan(MY_XPATH *xpath, static int my_xpath_parse_term(MY_XPATH *xpath, int term) { - if (xpath->lasttok.term == term) + if (xpath->lasttok.term == term && !xpath->error) { xpath->prevtok= xpath->lasttok; my_xpath_lex_scan(xpath, &xpath->lasttok, @@ -1558,8 +1568,9 @@ static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath) return my_xpath_parse_RelativeLocationPath(xpath); } - return my_xpath_parse_term(xpath, MY_XPATH_LEX_EOF) || - my_xpath_parse_RelativeLocationPath(xpath); + my_xpath_parse_RelativeLocationPath(xpath); + + return (xpath->error == 0); } @@ -1596,7 +1607,10 @@ static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath) "*", 1, xpath->pxml, 1); if (!my_xpath_parse_Step(xpath)) + { + xpath->error= 1; return 0; + } } return 1; } @@ -1633,10 +1647,16 @@ my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(MY_XPATH *xpath) xpath->context_cache= context_cache; if(!my_xpath_parse_PredicateExpr(xpath)) + { + xpath->error= 1; return 0; + } if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB)) + { + xpath->error= 1; return 0; + } xpath->item= nodeset2bool(xpath, xpath->item); @@ -1893,7 +1913,10 @@ static int my_xpath_parse_UnionExpr(MY_XPATH *xpath) if (!my_xpath_parse_PathExpr(xpath) || xpath->item->type() != Item::XPATH_NODESET) + { + xpath->error= 1; return 0; + } xpath->item= new Item_nodeset_func_union(prev, xpath->item, xpath->pxml); } return 1; @@ -1929,6 +1952,7 @@ static int my_xpath_parse_PathExpr(MY_XPATH *xpath) { return my_xpath_parse_LocationPath(xpath) || my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath); + } @@ -1975,7 +1999,10 @@ static int my_xpath_parse_OrExpr(MY_XPATH *xpath) { Item *prev= xpath->item; if (!my_xpath_parse_AndExpr(xpath)) + { return 0; + xpath->error= 1; + } xpath->item= new Item_cond_or(nodeset2bool(xpath, prev), nodeset2bool(xpath, xpath->item)); } @@ -2003,7 +2030,10 @@ static int my_xpath_parse_AndExpr(MY_XPATH *xpath) { Item *prev= xpath->item; if (!my_xpath_parse_EqualityExpr(xpath)) + { + xpath->error= 1; return 0; + } xpath->item= new Item_cond_and(nodeset2bool(xpath,prev), nodeset2bool(xpath,xpath->item)); @@ -2057,17 +2087,26 @@ static int my_xpath_parse_EqualityOperator(MY_XPATH *xpath) } static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath) { + MY_XPATH_LEX operator_context; if (!my_xpath_parse_RelationalExpr(xpath)) return 0; + + operator_context= xpath->lasttok; while (my_xpath_parse_EqualityOperator(xpath)) { Item *prev= xpath->item; int oper= xpath->extra; if (!my_xpath_parse_RelationalExpr(xpath)) + { + xpath->error= 1; return 0; + } - if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) + if (!(xpath->item= create_comparator(xpath, oper, &operator_context, + prev, xpath->item))) return 0; + + operator_context= xpath->lasttok; } return 1; } @@ -2109,18 +2148,25 @@ static int my_xpath_parse_RelationalOperator(MY_XPATH *xpath) } static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath) { + MY_XPATH_LEX operator_context; if (!my_xpath_parse_AdditiveExpr(xpath)) return 0; + operator_context= xpath->lasttok; while (my_xpath_parse_RelationalOperator(xpath)) { Item *prev= xpath->item; int oper= xpath->extra; if (!my_xpath_parse_AdditiveExpr(xpath)) + { + xpath->error= 1; return 0; + } - if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) + if (!(xpath->item= create_comparator(xpath, oper, &operator_context, + prev, xpath->item))) return 0; + operator_context= xpath->lasttok; } return 1; } @@ -2153,7 +2199,10 @@ static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath) int oper= xpath->prevtok.term; Item *prev= xpath->item; if (!my_xpath_parse_MultiplicativeExpr(xpath)) + { + xpath->error= 1; return 0; + } if (oper == MY_XPATH_LEX_PLUS) xpath->item= new Item_func_plus(prev, xpath->item); @@ -2198,7 +2247,10 @@ static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath) int oper= xpath->prevtok.term; Item *prev= xpath->item; if (!my_xpath_parse_UnaryExpr(xpath)) + { + xpath->error= 1; return 0; + } switch (oper) { case MY_XPATH_LEX_ASTERISK: diff --git a/strings/xml.c b/strings/xml.c index 51649dcb343..7f7c531d051 100644 --- a/strings/xml.c +++ b/strings/xml.c @@ -19,6 +19,7 @@ #include "my_xml.h" +#define MY_XML_UNKNOWN 'U' #define MY_XML_EOF 'E' #define MY_XML_STRING 'S' #define MY_XML_IDENT 'I' @@ -39,6 +40,46 @@ typedef struct xml_attr_st } MY_XML_ATTR; +/* + XML ctype: +*/ +#define MY_XML_ID0 0x01 /* Identifier initial character */ +#define MY_XML_ID1 0x02 /* Identifier medial character */ +#define MY_XML_SPC 0x08 /* Spacing character */ + + +/* + http://www.w3.org/TR/REC-xml/ + [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | + CombiningChar | Extender + [5] Name ::= (Letter | '_' | ':') (NameChar)* +*/ + +static char my_xml_ctype[256]= +{ +/*00*/ 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0, +/*10*/ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +/*20*/ 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0, /* !"#$%&'()*+,-./ */ +/*30*/ 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0, /* 0123456789:;<=>? */ +/*40*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO */ +/*50*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ */ +/*60*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno */ +/*70*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ */ +/*80*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*90*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*A0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*B0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*C0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*D0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*E0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/*F0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 +}; + +#define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC) +#define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0) +#define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1) + + static const char *lex2str(int lex) { switch(lex) @@ -56,13 +97,13 @@ static const char *lex2str(int lex) case MY_XML_QUESTION: return "'?'"; case MY_XML_EXCLAM: return "'!'"; } - return "UNKNOWN"; + return "unknown token"; } static void my_xml_norm_text(MY_XML_ATTR *a) { - for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->beg[0]) ; a->beg++ ); - for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->end[-1]) ; a->end-- ); + for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ ); + for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- ); } @@ -70,7 +111,7 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) { int lex; - for( ; ( p->cur < p->end) && strchr(" \t\r\n",p->cur[0]) ; p->cur++); + for( ; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++); if (p->cur >= p->end) { @@ -124,16 +165,17 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) my_xml_norm_text(a); lex=MY_XML_STRING; } - else + else if (my_xml_is_id0(p->cur[0])) { - for(; - (p->cur < p->end) && !strchr("?'\"=/<> \t\r\n", p->cur[0]); - p->cur++) - {} + p->cur++; + while (p->cur < p->end && my_xml_is_id1(p->cur[0])) + p->cur++; a->end=p->cur; my_xml_norm_text(a); lex=MY_XML_IDENT; } + else + lex= MY_XML_UNKNOWN; #if 0 printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg); |