summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <msvensson@neptunus.(none)>2006-04-11 11:43:51 +0200
committerunknown <msvensson@neptunus.(none)>2006-04-11 11:43:51 +0200
commit08dd4932ce3d3ef14342b5b7ffd81eb8be4a66c2 (patch)
treef56e3883065aaecd3588e2b613352c5a402b6fe1
parentc8f24984dcb843d14f37d5e8946b19dbda1334c7 (diff)
parent7fc5eaadb18799b74000333da7c6a07ff5017503 (diff)
downloadmariadb-git-08dd4932ce3d3ef14342b5b7ffd81eb8be4a66c2.tar.gz
Merge bk-internal:/home/bk/mysql-5.1-new
into neptunus.(none):/home/msvensson/mysql/mysql-5.1
-rw-r--r--mysql-test/r/xml.result23
-rw-r--r--mysql-test/t/xml.test20
-rw-r--r--sql/item_xmlfunc.cc64
-rw-r--r--strings/ctype-simple.c2
4 files changed, 70 insertions, 39 deletions
diff --git a/mysql-test/r/xml.result b/mysql-test/r/xml.result
index 52f80000015..2946c56da6b 100644
--- a/mysql-test/r/xml.result
+++ b/mysql-test/r/xml.result
@@ -615,3 +615,26 @@ select extractValue('<e>1</e>','last()');
ERROR HY000: XPATH syntax error: ''
select extractValue('<e><a>1</a></e>','/e/');
ERROR HY000: XPATH syntax error: ''
+set names utf8;
+select extractValue('<Ñ><r>r</r></Ñ>','/Ñ/r');
+extractValue('<Ñ><r>r</r></Ñ>','/Ñ/r')
+r
+select extractValue('<r><Ñ>Ñ</Ñ></r>','/r/Ñ');
+extractValue('<r><Ñ>Ñ</Ñ></r>','/r/Ñ')
+select extractValue('<Ñ r="r"/>','/Ñ/@r');
+extractValue('<Ñ r="r"/>','/Ñ/@r')
+r
+select extractValue('<r Ñ="Ñ"/>','/r/@Ñ');
+extractValue('<r Ñ="Ñ"/>','/r/@Ñ')
+DROP PROCEDURE IF EXISTS p2;
+CREATE PROCEDURE p2 ()
+BEGIN
+DECLARE p LONGTEXT CHARACTER SET UTF8 DEFAULT '<Ñ><r>A</r></Ñ>';
+SELECT EXTRACTVALUE(p,'/Ñ/r');
+END//
+CALL p2();
+EXTRACTVALUE(p,'/Ñ/r')
+A
+DROP PROCEDURE p2;
diff --git a/mysql-test/t/xml.test b/mysql-test/t/xml.test
index af3ec2d827e..4bc76287fe2 100644
--- a/mysql-test/t/xml.test
+++ b/mysql-test/t/xml.test
@@ -295,3 +295,23 @@ select extractValue('<e>1</e>','last()');
--error 1105
select extractValue('<e><a>1</a></e>','/e/');
+#
+# Bug#16233: XML: ExtractValue() fails with special characters
+#
+set names utf8;
+select extractValue('<Ñ><r>r</r></Ñ>','/Ñ/r');
+select extractValue('<r><Ñ>Ñ</Ñ></r>','/r/Ñ');
+select extractValue('<Ñ r="r"/>','/Ñ/@r');
+select extractValue('<r Ñ="Ñ"/>','/r/@Ñ');
+--disable_warnings
+DROP PROCEDURE IF EXISTS p2;
+--enable_warnings
+DELIMITER //;
+CREATE PROCEDURE p2 ()
+BEGIN
+ DECLARE p LONGTEXT CHARACTER SET UTF8 DEFAULT '<Ñ><r>A</r></Ñ>';
+ SELECT EXTRACTVALUE(p,'/Ñ/r');
+END//
+DELIMITER ;//
+CALL p2();
+DROP PROCEDURE p2;
diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc
index 91f958d5b70..71900c26c2d 100644
--- a/sql/item_xmlfunc.cc
+++ b/sql/item_xmlfunc.cc
@@ -1304,30 +1304,6 @@ my_xpath_init(MY_XPATH *xpath)
}
-/*
- Some ctype-alike helper functions. Note, we cannot
- reuse cs->ident_map[], because in Xpath, unlike in SQL,
- dash character is a valid identifier part.
-*/
-static int
-my_xident_beg(int c)
-{
- return (((c) >= 'a' && (c) <= 'z') ||
- ((c) >= 'A' && (c) <= 'Z') ||
- ((c) == '_'));
-}
-
-
-static int
-my_xident_body(int c)
-{
- return (((c) >= 'a' && (c) <= 'z') ||
- ((c) >= 'A' && (c) <= 'Z') ||
- ((c) >= '0' && (c) <= '9') ||
- ((c)=='-') || ((c) == '_'));
-}
-
-
static int
my_xdigit(int c)
{
@@ -1350,7 +1326,7 @@ static void
my_xpath_lex_scan(MY_XPATH *xpath,
MY_XPATH_LEX *lex, const char *beg, const char *end)
{
- int ch;
+ int ch, ctype, length;
for ( ; beg < end && *beg == ' ' ; beg++); // skip leading spaces
lex->beg= beg;
@@ -1360,20 +1336,20 @@ my_xpath_lex_scan(MY_XPATH *xpath,
lex->term= MY_XPATH_LEX_EOF; // end of line reached
return;
}
- ch= *beg++;
-
- if (ch > 0 && ch < 128 && simpletok[ch])
- {
- // a token consisting of one character found
- lex->end= beg;
- lex->term= ch;
- return;
- }
-
- if (my_xident_beg(ch)) // ident, or a function call, or a keyword
+
+ // Check ident, or a function call, or a keyword
+ if ((length= xpath->cs->cset->ctype(xpath->cs, &ctype,
+ (const uchar*) beg,
+ (const uchar*) end)) > 0 &&
+ ((ctype & (_MY_L | _MY_U)) || *beg == '_'))
{
- // scan until the end of the identifier
- for ( ; beg < end && my_xident_body(*beg); beg++);
+ // scan untill the end of the idenfitier
+ for (beg+= length;
+ (length= xpath->cs->cset->ctype(xpath->cs, &ctype,
+ (const uchar*) beg,
+ (const uchar*) end)) > 0 &&
+ ((ctype & (_MY_L | _MY_U | _MY_NMR)) || *beg == '_' || *beg == '-') ;
+ beg+= length) /* no op */;
lex->end= beg;
// check if a function call
@@ -1388,6 +1364,18 @@ my_xpath_lex_scan(MY_XPATH *xpath,
return;
}
+
+ ch= *beg++;
+
+ if (ch > 0 && ch < 128 && simpletok[ch])
+ {
+ // a token consisting of one character found
+ lex->end= beg;
+ lex->term= ch;
+ return;
+ }
+
+
if (my_xdigit(ch)) // a sequence of digits
{
for ( ; beg < end && my_xdigit(*beg) ; beg++);
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 7dd3dfca29a..a9fd5b8852e 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1362,7 +1362,7 @@ int my_mb_ctype_8bit(CHARSET_INFO *cs, int *ctype,
*ctype= 0;
return MY_CS_TOOSMALL;
}
- *ctype= cs->ctype[*s];
+ *ctype= cs->ctype[*s + 1];
return 1;
}