summaryrefslogtreecommitdiff
path: root/sql/item_xmlfunc.cc
diff options
context:
space:
mode:
authorbar@mysql.com/bar.myoffice.izhnet.ru <>2007-10-09 13:53:39 +0500
committerbar@mysql.com/bar.myoffice.izhnet.ru <>2007-10-09 13:53:39 +0500
commit015c00ccb801044c3091fcf1821e87e8cf94e11c (patch)
tree5e88ef872d6be0540ddc207add0053dfb0aedadf /sql/item_xmlfunc.cc
parent2c12ddc1d4cd9b0a665150041b7296b220d968fe (diff)
downloadmariadb-git-015c00ccb801044c3091fcf1821e87e8cf94e11c.tar.gz
Bug#27287 extractvalue() (and updatexml()) extremely slow for large XML
Performance improvements made. ExtractValue for large XML values is now much faster (about 2000 times faster of 1Mb-long XML values).
Diffstat (limited to 'sql/item_xmlfunc.cc')
-rw-r--r--sql/item_xmlfunc.cc60
1 files changed, 24 insertions, 36 deletions
diff --git a/sql/item_xmlfunc.cc b/sql/item_xmlfunc.cc
index 15be9c97b6e..d0dbab16b1c 100644
--- a/sql/item_xmlfunc.cc
+++ b/sql/item_xmlfunc.cc
@@ -2611,35 +2611,27 @@ typedef struct
uint level;
String *pxml; // parsed XML
uint pos[MAX_LEVEL]; // Tag position stack
+ uint parent; // Offset of the parent of the current node
} MY_XML_USER_DATA;
-/*
- Find the parent node
-
- SYNOPSYS
- Find the parent node, i.e. a tag or attrubute node on the given level.
-
- RETURN
- 1 - success
- 0 - failure
-*/
-static uint xml_parent_tag(MY_XML_NODE *items, uint nitems, uint level)
+static bool
+append_node(String *str, MY_XML_NODE *node)
{
- if (!nitems)
- return 0;
-
- MY_XML_NODE *p, *last= &items[nitems-1];
- for (p= last; p >= items; p--)
- {
- if (p->level == level &&
- (p->type == MY_XML_NODE_TAG ||
- p->type == MY_XML_NODE_ATTR))
- {
- return p - items;
- }
- }
- return 0;
+ /*
+ If "str" doesn't have space for a new node,
+ it will allocate two times more space that it has had so far.
+ (2*len+512) is a heuristic value,
+ which gave the best performance during tests.
+ The ideas behind this formula are:
+ - It allows to have a very small number of reallocs:
+ about 10 reallocs on a 1Mb-long XML value.
+ - At the same time, it avoids excessive memory use.
+ */
+ if (str->reserve(sizeof(MY_XML_NODE), 2 * str->length() + 512))
+ return TRUE;
+ str->q_append((const char*) node, sizeof(MY_XML_NODE));
+ return FALSE;
}
@@ -2661,19 +2653,17 @@ extern "C" int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len);
int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len)
{
MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data;
- MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr();
uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE);
- uint parent= xml_parent_tag(nodes, numnodes, data->level - 1);
MY_XML_NODE node;
+ node.parent= data->parent; // Set parent for the new node to old parent
+ data->parent= numnodes; // Remember current node as new parent
data->pos[data->level]= numnodes;
node.level= data->level++;
node.type= st->current_node_type; // TAG or ATTR
node.beg= attr;
node.end= attr + len;
- node.parent= parent;
- data->pxml->append((const char*) &node, sizeof(MY_XML_NODE));
- return MY_XML_OK;
+ return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK;
}
@@ -2694,18 +2684,14 @@ extern "C" int xml_value(MY_XML_PARSER *st,const char *attr, size_t len);
int xml_value(MY_XML_PARSER *st,const char *attr, size_t len)
{
MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data;
- MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr();
- uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE);
- uint parent= xml_parent_tag(nodes, numnodes, data->level - 1);
MY_XML_NODE node;
+ node.parent= data->parent; // Set parent for the new text node to old parent
node.level= data->level;
node.type= MY_XML_NODE_TEXT;
node.beg= attr;
node.end= attr + len;
- node.parent= parent;
- data->pxml->append((const char*) &node, sizeof(MY_XML_NODE));
- return MY_XML_OK;
+ return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK;
}
@@ -2730,6 +2716,7 @@ int xml_leave(MY_XML_PARSER *st,const char *attr, size_t len)
data->level--;
MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr();
+ data->parent= nodes[data->parent].parent;
nodes+= data->pos[data->level];
nodes->tagend= st->cur;
@@ -2760,6 +2747,7 @@ String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf)
p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION;
user_data.level= 0;
user_data.pxml= parsed_xml_buf;
+ user_data.parent= 0;
my_xml_set_enter_handler(&p, xml_enter);
my_xml_set_value_handler(&p, xml_value);
my_xml_set_leave_handler(&p, xml_leave);