summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--debugXML.c6
-rw-r--r--include/libxml/xpath.h4
-rw-r--r--parserInternals.c2
-rw-r--r--win32/libxml2/libxml2.def.src2
-rw-r--r--xpath.c209
-rw-r--r--xpath.h4
7 files changed, 216 insertions, 21 deletions
diff --git a/ChangeLog b/ChangeLog
index 21a14f7d..13d2db48 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Fri Apr 27 19:06:13 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
+
+ * parserInternals.c xpath.[ch]: some UTF8 cleanup on
+ xmlXPathParseName
+ * xpath.c: Igor Zlatkovic suggested a change for NAN and MSC
+ * debugXML.c: avoid compilation problems if compiling without
+ HTML support, Igor Zlatkovic
+ * win32/libxml2/libxml2.def.src: being able to compile without
+ XPath on Windows
+
Thu Apr 26 22:53:03 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
* libxml.m4: yet another patch from Toshio Kuratomi
diff --git a/debugXML.c b/debugXML.c
index f7b10d5a..386be444 100644
--- a/debugXML.c
+++ b/debugXML.c
@@ -1723,7 +1723,9 @@ xmlShell(xmlDocPtr doc, char *filename, xmlShellReadlineFunc input,
"%s is an XSLT value tree\n", arg);
break;
}
+#ifdef LIBXML_XPATH_ENABLED
xmlXPathFreeNodeSetList(list);
+#endif
} else {
xmlGenericError(xmlGenericErrorContext,
"%s: no such node\n", arg);
@@ -1787,7 +1789,9 @@ xmlShell(xmlDocPtr doc, char *filename, xmlShellReadlineFunc input,
"%s is an XSLT value tree\n", arg);
break;
}
+#ifdef LIBXML_XPATH_ENABLED
xmlXPathFreeNodeSetList(list);
+#endif
} else {
xmlGenericError(xmlGenericErrorContext,
"%s: no such node\n", arg);
@@ -1855,7 +1859,9 @@ xmlShell(xmlDocPtr doc, char *filename, xmlShellReadlineFunc input,
"%s is an XSLT value tree\n", arg);
break;
}
+#ifdef LIBXML_XPATH_ENABLED
xmlXPathFreeNodeSetList(list);
+#endif
} else {
xmlGenericError(xmlGenericErrorContext,
"%s: no such node\n", arg);
diff --git a/include/libxml/xpath.h b/include/libxml/xpath.h
index c9b846c4..a7223b58 100644
--- a/include/libxml/xpath.h
+++ b/include/libxml/xpath.h
@@ -48,7 +48,9 @@ typedef enum {
XPTR_SYNTAX_ERROR,
XPTR_RESOURCE_ERROR,
XPTR_SUB_RESOURCE_ERROR,
- XPATH_UNDEF_PREFIX_ERROR
+ XPATH_UNDEF_PREFIX_ERROR,
+ XPATH_ENCODING_ERROR,
+ XPATH_INVALID_CHAR_ERROR
} xmlXPathError;
/*
diff --git a/parserInternals.c b/parserInternals.c
index bf41a28e..75de22d6 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -1380,7 +1380,7 @@ encoding_error:
int
xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
- if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
+ if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
/*
* We are supposed to handle UTF8, check it's valid
* From rfc2044: encoding of the Unicode values on UTF-8:
diff --git a/win32/libxml2/libxml2.def.src b/win32/libxml2/libxml2.def.src
index 3223a8cf..39692930 100644
--- a/win32/libxml2/libxml2.def.src
+++ b/win32/libxml2/libxml2.def.src
@@ -716,7 +716,9 @@ EXPORTS
xmlRegisterOutputCallbacks
xmlSaveFileTo
xmlNodeDumpOutput
+#ifdef LIBXML_HTML_ENABLED
htmlDocContentDumpOutput
+#endif /* LIBXML_HTML_ENABLED */
/*
* xmlmemory.h
diff --git a/xpath.c b/xpath.c
index 57cdc0ae..ba069c65 100644
--- a/xpath.c
+++ b/xpath.c
@@ -157,11 +157,18 @@ xmlXPathInit(void) {
if (initialized) return;
+#ifdef XPATH_USE_DIVISION_SHORTCUTS
+ xmlXPathNAN = 0;
+ xmlXPathNAN /= 0.0;
+ xmlXPathPINF = 1;
+ xmlXPathPINF /= 0.0;
+ xmlXPathNINF = -1;
+ xmlXPathNINF /= 0.0;
+#else
xmlXPathNAN = 0.0 / 0.0;
-
xmlXPathPINF = 1 / 0.0;
-
xmlXPathNINF = -1 / 0.0;
+#endif
initialized = 1;
}
@@ -884,6 +891,13 @@ PUSH_AND_POP(xmlXPathObjectPtr, value)
#define SKIP(val) ctxt->cur += (val)
#define NXT(val) ctxt->cur[(val)]
#define CUR_PTR ctxt->cur
+#define CUR_CHAR(l) xmlXPathCurrentChar(ctxt, &l)
+
+#define COPY_BUF(l,b,i,v) \
+ if (l == 1) b[i++] = (xmlChar) v; \
+ else i += xmlCopyChar(l,&b[i],v)
+
+#define NEXTL(l) ctxt->cur += l
#define SKIP_BLANKS \
while (IS_BLANK(*(ctxt->cur))) NEXT
@@ -1014,7 +1028,9 @@ const char *xmlXPathErrorMessages[] = {
"Syntax error",
"Resource error",
"Sub resource error",
- "Undefined namespace prefix"
+ "Undefined namespace prefix",
+ "Encoding error",
+ "Char out of XML range"
};
/**
@@ -5041,6 +5057,92 @@ static void xmlXPathCompRelativeLocationPath(xmlXPathParserContextPtr ctxt);
#endif
/**
+ * xmlXPathCurrentChar:
+ * @ctxt: the XPath parser context
+ * @cur: pointer to the beginning of the char
+ * @len: pointer to the length of the char read
+ *
+ * The current char value, if using UTF-8 this may actaully span multiple
+ * bytes in the input buffer.
+ *
+ * Returns the current char value and its lenght
+ */
+
+static int
+xmlXPathCurrentChar(xmlXPathParserContextPtr ctxt, int *len) {
+ unsigned char c;
+ unsigned int val;
+ const xmlChar *cur;
+
+ if (ctxt == NULL)
+ return(0);
+ cur = ctxt->cur;
+
+ /*
+ * We are supposed to handle UTF8, check it's valid
+ * From rfc2044: encoding of the Unicode values on UTF-8:
+ *
+ * UCS-4 range (hex.) UTF-8 octet sequence (binary)
+ * 0000 0000-0000 007F 0xxxxxxx
+ * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
+ * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+ *
+ * Check for the 0x110000 limit too
+ */
+ c = *cur;
+ if (c & 0x80) {
+ if ((cur[1] & 0xc0) != 0x80)
+ goto encoding_error;
+ if ((c & 0xe0) == 0xe0) {
+
+ if ((cur[2] & 0xc0) != 0x80)
+ goto encoding_error;
+ if ((c & 0xf0) == 0xf0) {
+ if (((c & 0xf8) != 0xf0) ||
+ ((cur[3] & 0xc0) != 0x80))
+ goto encoding_error;
+ /* 4-byte code */
+ *len = 4;
+ val = (cur[0] & 0x7) << 18;
+ val |= (cur[1] & 0x3f) << 12;
+ val |= (cur[2] & 0x3f) << 6;
+ val |= cur[3] & 0x3f;
+ } else {
+ /* 3-byte code */
+ *len = 3;
+ val = (cur[0] & 0xf) << 12;
+ val |= (cur[1] & 0x3f) << 6;
+ val |= cur[2] & 0x3f;
+ }
+ } else {
+ /* 2-byte code */
+ *len = 2;
+ val = (cur[0] & 0x1f) << 6;
+ val |= cur[1] & 0x3f;
+ }
+ if (!IS_CHAR(val)) {
+ XP_ERROR0(XPATH_INVALID_CHAR_ERROR);
+ }
+ return(val);
+ } else {
+ /* 1-byte code */
+ *len = 1;
+ return((int) *cur);
+ }
+encoding_error:
+ /*
+ * If we detect an UTF8 error that probably mean that the
+ * input encoding didn't get properly advertized in the
+ * declaration header. Report the error and switch the encoding
+ * to ISO-Latin-1 (if you don't like this policy, just declare the
+ * encoding !)
+ */
+ XP_ERROR0(XPATH_ENCODING_ERROR);
+ *len = 1;
+ return((int) *cur);
+}
+
+/**
* xmlXPathParseNCName:
* @ctxt: the XPath Parser context
*
@@ -5105,6 +5207,7 @@ xmlXPathParseQName(xmlXPathParserContextPtr ctxt, xmlChar **prefix) {
return(ret);
}
+static xmlChar * xmlXPathParseNameComplex(xmlXPathParserContextPtr ctxt);
/**
* xmlXPathParseName:
* @ctxt: the XPath Parser context
@@ -5121,25 +5224,95 @@ xmlXPathParseQName(xmlXPathParserContextPtr ctxt, xmlChar **prefix) {
xmlChar *
xmlXPathParseName(xmlXPathParserContextPtr ctxt) {
- const xmlChar *q;
- xmlChar *ret = NULL;
+ const xmlChar *in;
+ xmlChar *ret;
+ int count = 0;
- if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
- q = NEXT;
+ /*
+ * Accelerator for simple ASCII names
+ */
+ in = ctxt->cur;
+ if (((*in >= 0x61) && (*in <= 0x7A)) ||
+ ((*in >= 0x41) && (*in <= 0x5A)) ||
+ (*in == '_') || (*in == ':')) {
+ in++;
+ while (((*in >= 0x61) && (*in <= 0x7A)) ||
+ ((*in >= 0x41) && (*in <= 0x5A)) ||
+ ((*in >= 0x30) && (*in <= 0x39)) ||
+ (*in == '_') || (*in == ':'))
+ in++;
+ if ((*in == ' ') || (*in == '>') || (*in == '/')) {
+ count = in - ctxt->cur;
+ ret = xmlStrndup(ctxt->cur, count);
+ ctxt->cur = in;
+ return(ret);
+ }
+ }
+ return(xmlXPathParseNameComplex(ctxt));
+}
- /* TODO Make this UTF8 compliant !!! */
- while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
- (CUR == '.') || (CUR == '-') ||
- (CUR == '_') || (CUR == ':') ||
- (IS_COMBINING(CUR)) ||
- (IS_EXTENDER(CUR)))
- NEXT;
-
- ret = xmlStrndup(q, CUR_PTR - q);
+static xmlChar *
+xmlXPathParseNameComplex(xmlXPathParserContextPtr ctxt) {
+ xmlChar buf[XML_MAX_NAMELEN + 5];
+ int len = 0, l;
+ int c;
- return(ret);
-}
+ /*
+ * Handler for more complex cases
+ */
+ c = CUR_CHAR(l);
+ if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
+ (!IS_LETTER(c) && (c != '_') &&
+ (c != ':'))) {
+ return(NULL);
+ }
+ while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
+ ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
+ (c == '.') || (c == '-') ||
+ (c == '_') || (c == ':') ||
+ (IS_COMBINING(c)) ||
+ (IS_EXTENDER(c)))) {
+ COPY_BUF(l,buf,len,c);
+ NEXTL(l);
+ c = CUR_CHAR(l);
+ if (len >= XML_MAX_NAMELEN) {
+ /*
+ * Okay someone managed to make a huge name, so he's ready to pay
+ * for the processing speed.
+ */
+ xmlChar *buffer;
+ int max = len * 2;
+
+ buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
+ if (buffer == NULL) {
+ XP_ERROR0(XPATH_MEMORY_ERROR);
+ }
+ memcpy(buffer, buf, len);
+ while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
+ (c == '.') || (c == '-') ||
+ (c == '_') || (c == ':') ||
+ (IS_COMBINING(c)) ||
+ (IS_EXTENDER(c))) {
+ if (len + 10 > max) {
+ max *= 2;
+ buffer = (xmlChar *) xmlRealloc(buffer,
+ max * sizeof(xmlChar));
+ XP_ERROR0(XPATH_MEMORY_ERROR);
+ if (buffer == NULL) {
+ XP_ERROR0(XPATH_MEMORY_ERROR);
+ }
+ }
+ COPY_BUF(l,buffer,len,c);
+ NEXTL(l);
+ c = CUR_CHAR(l);
+ }
+ buffer[len] = 0;
+ return(buffer);
+ }
+ }
+ return(xmlStrndup(buf, len));
+}
/**
* xmlXPathStringEvalNumber:
* @str: A string to scan
diff --git a/xpath.h b/xpath.h
index c9b846c4..a7223b58 100644
--- a/xpath.h
+++ b/xpath.h
@@ -48,7 +48,9 @@ typedef enum {
XPTR_SYNTAX_ERROR,
XPTR_RESOURCE_ERROR,
XPTR_SUB_RESOURCE_ERROR,
- XPATH_UNDEF_PREFIX_ERROR
+ XPATH_UNDEF_PREFIX_ERROR,
+ XPATH_ENCODING_ERROR,
+ XPATH_INVALID_CHAR_ERROR
} xmlXPathError;
/*