summaryrefslogtreecommitdiff
path: root/sql/item_xmlfunc.cc
diff options
context:
space:
mode:
authorbar@mysql.com <>2006-04-11 13:25:02 +0500
committerbar@mysql.com <>2006-04-11 13:25:02 +0500
commitdf2d425afda80c60e3db6015f4d05ea0a3fe4648 (patch)
treeefede149e0da572a4272ed490cb8c7f5de23d47d /sql/item_xmlfunc.cc
parentba5d08f340b951001bdd4c2a92b0344bace538cb (diff)
downloadmariadb-git-df2d425afda80c60e3db6015f4d05ea0a3fe4648.tar.gz
Bug#16233: XML: ExtractValue() fails with special characters
ExtractValue didn't understand tag and attribute names consisting of "tricky" national letters (e.g. latin accenter letters). It happened because XPath lex parser recognized only basic latin letter a..z ad a part of an identifier. Fixed to recognize all letters by means of new "full ctype" which was added recently.
Diffstat (limited to 'sql/item_xmlfunc.cc')
-rw-r--r--sql/item_xmlfunc.cc64
1 files changed, 26 insertions, 38 deletions
diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc
index 91f958d5b70..71900c26c2d 100644
--- a/sql/item_xmlfunc.cc
+++ b/sql/item_xmlfunc.cc
@@ -1304,30 +1304,6 @@ my_xpath_init(MY_XPATH *xpath)
}
-/*
- Some ctype-alike helper functions. Note, we cannot
- reuse cs->ident_map[], because in Xpath, unlike in SQL,
- dash character is a valid identifier part.
-*/
-static int
-my_xident_beg(int c)
-{
- return (((c) >= 'a' && (c) <= 'z') ||
- ((c) >= 'A' && (c) <= 'Z') ||
- ((c) == '_'));
-}
-
-
-static int
-my_xident_body(int c)
-{
- return (((c) >= 'a' && (c) <= 'z') ||
- ((c) >= 'A' && (c) <= 'Z') ||
- ((c) >= '0' && (c) <= '9') ||
- ((c)=='-') || ((c) == '_'));
-}
-
-
static int
my_xdigit(int c)
{
@@ -1350,7 +1326,7 @@ static void
my_xpath_lex_scan(MY_XPATH *xpath,
MY_XPATH_LEX *lex, const char *beg, const char *end)
{
- int ch;
+ int ch, ctype, length;
for ( ; beg < end && *beg == ' ' ; beg++); // skip leading spaces
lex->beg= beg;
@@ -1360,20 +1336,20 @@ my_xpath_lex_scan(MY_XPATH *xpath,
lex->term= MY_XPATH_LEX_EOF; // end of line reached
return;
}
- ch= *beg++;
-
- if (ch > 0 && ch < 128 && simpletok[ch])
- {
- // a token consisting of one character found
- lex->end= beg;
- lex->term= ch;
- return;
- }
-
- if (my_xident_beg(ch)) // ident, or a function call, or a keyword
+
+ // Check ident, or a function call, or a keyword
+ if ((length= xpath->cs->cset->ctype(xpath->cs, &ctype,
+ (const uchar*) beg,
+ (const uchar*) end)) > 0 &&
+ ((ctype & (_MY_L | _MY_U)) || *beg == '_'))
{
- // scan until the end of the identifier
- for ( ; beg < end && my_xident_body(*beg); beg++);
+ // scan untill the end of the idenfitier
+ for (beg+= length;
+ (length= xpath->cs->cset->ctype(xpath->cs, &ctype,
+ (const uchar*) beg,
+ (const uchar*) end)) > 0 &&
+ ((ctype & (_MY_L | _MY_U | _MY_NMR)) || *beg == '_' || *beg == '-') ;
+ beg+= length) /* no op */;
lex->end= beg;
// check if a function call
@@ -1388,6 +1364,18 @@ my_xpath_lex_scan(MY_XPATH *xpath,
return;
}
+
+ ch= *beg++;
+
+ if (ch > 0 && ch < 128 && simpletok[ch])
+ {
+ // a token consisting of one character found
+ lex->end= beg;
+ lex->term= ch;
+ return;
+ }
+
+
if (my_xdigit(ch)) // a sequence of digits
{
for ( ; beg < end && my_xdigit(*beg) ; beg++);