summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mysql.com>2008-12-10 13:05:57 +0400
committerAlexander Barkov <bar@mysql.com>2008-12-10 13:05:57 +0400
commit8d4b41605b0bbc731dafc7bd8a9f850eaf06f552 (patch)
treefaad7ef64c45f044551b4dd6bfa03f73bb1cdf05
parente2fef3d10a2f592e42b23563ac9369e5ce40a747 (diff)
downloadmariadb-git-8d4b41605b0bbc731dafc7bd8a9f850eaf06f552.tar.gz
Bug#38227 EXTRACTVALUE doesn't work with DTD declarations
Problem: XML syntax parser allowed to use quoted strings as attribute names, and tried to put them into parser state stack instead of identifiers. After that parser failed, if quoted string contained some slash characters. Fix: - Disallowing quoted strings in regular tags. - Allowing quoted string in DOCTYPE declararion, but don't push it into parse state stack (just skip it).
-rw-r--r--mysql-test/r/xml.result24
-rw-r--r--mysql-test/t/xml.test25
-rw-r--r--strings/xml.c13
3 files changed, 60 insertions, 2 deletions
diff --git a/mysql-test/r/xml.result b/mysql-test/r/xml.result
index 56c884343e3..41c0d6bee21 100644
--- a/mysql-test/r/xml.result
+++ b/mysql-test/r/xml.result
@@ -1029,4 +1029,28 @@ SELECT 1 FROM t1 ORDER BY(UPDATEXML(a, '1', '1'));
1
1
DROP TABLE t1;
+SET @xml=
+'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+ <head>
+ <title> Title - document with document declaration</title>
+ </head>
+ <body> Hi, Im a webpage with document a declaration </body>
+</html>';
+SELECT ExtractValue(@xml, 'html/head/title');
+ExtractValue(@xml, 'html/head/title')
+ Title - document with document declaration
+SELECT ExtractValue(@xml, 'html/body');
+ExtractValue(@xml, 'html/body')
+ Hi, Im a webpage with document a declaration
+SELECT ExtractValue('<xml "xxx" "yyy">CharData</xml>', '/xml');
+ExtractValue('<xml "xxx" "yyy">CharData</xml>', '/xml')
+NULL
+Warnings:
+Warning 1525 Incorrect XML value: 'parse error at line 1 pos 11: STRING unexpected ('>' wanted)'
+SELECT ExtractValue('<xml xxx "yyy">CharData</xml>', '/xml');
+ExtractValue('<xml xxx "yyy">CharData</xml>', '/xml')
+NULL
+Warnings:
+Warning 1525 Incorrect XML value: 'parse error at line 1 pos 17: STRING unexpected ('>' wanted)'
End of 5.1 tests
diff --git a/mysql-test/t/xml.test b/mysql-test/t/xml.test
index 5ca9c7afd76..d840e14ba5f 100644
--- a/mysql-test/t/xml.test
+++ b/mysql-test/t/xml.test
@@ -551,4 +551,29 @@ INSERT INTO t1 VALUES (0), (0);
SELECT 1 FROM t1 ORDER BY(UPDATEXML(a, '1', '1'));
DROP TABLE t1;
+#
+# BUG#38227 EXTRACTVALUE doesn't work with DTD declarations
+#
+# Check that quoted strings work fine in DOCTYPE declaration.
+#
+
+SET @xml=
+'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+ <head>
+ <title> Title - document with document declaration</title>
+ </head>
+ <body> Hi, Im a webpage with document a declaration </body>
+</html>';
+
+SELECT ExtractValue(@xml, 'html/head/title');
+SELECT ExtractValue(@xml, 'html/body');
+
+# These two documents will fail.
+# Quoted strings are not allowed in regular tags
+#
+SELECT ExtractValue('<xml "xxx" "yyy">CharData</xml>', '/xml');
+SELECT ExtractValue('<xml xxx "yyy">CharData</xml>', '/xml');
+
+
--echo End of 5.1 tests
diff --git a/strings/xml.c b/strings/xml.c
index 5c62a8e8603..1b697ec6b26 100644
--- a/strings/xml.c
+++ b/strings/xml.c
@@ -328,7 +328,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, size_t len)
}
while ((MY_XML_IDENT == (lex=my_xml_scan(p,&a))) ||
- (MY_XML_STRING == lex))
+ ((MY_XML_STRING == lex && exclam)))
{
MY_XML_ATTR b;
if (MY_XML_EQ == (lex=my_xml_scan(p,&b)))
@@ -349,13 +349,22 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, size_t len)
return MY_XML_ERROR;
}
}
- else if ((MY_XML_STRING == lex) || (MY_XML_IDENT == lex))
+ else if (MY_XML_IDENT == lex)
{
p->current_node_type= MY_XML_NODE_ATTR;
if ((MY_XML_OK != my_xml_enter(p,a.beg,(size_t) (a.end-a.beg))) ||
(MY_XML_OK != my_xml_leave(p,a.beg,(size_t) (a.end-a.beg))))
return MY_XML_ERROR;
}
+ else if ((MY_XML_STRING == lex) && exclam)
+ {
+ /*
+ We are in <!DOCTYPE>, e.g.
+ <!DOCTYPE name SYSTEM "SystemLiteral">
+ <!DOCTYPE name PUBLIC "PublidLiteral" "SystemLiteral">
+ Just skip "SystemLiteral" and "PublicidLiteral"
+ */
+ }
else
break;
}