diff options
author | Daniel Veillard <veillard@src.gnome.org> | 1999-10-14 09:10:25 +0000 |
---|---|---|
committer | Daniel Veillard <veillard@src.gnome.org> | 1999-10-14 09:10:25 +0000 |
commit | 7c1206fc060fd4260f7c6f003c32fbbea3e542fd (patch) | |
tree | 355843d626573307ace64a03b5e3aa34971b3f2a /testHTML.c | |
parent | 7d2c276a65a077932bbe2ae5a1b3af304175f995 (diff) | |
download | libxml2-7c1206fc060fd4260f7c6f003c32fbbea3e542fd.tar.gz |
Revamped HTML parsing, lots of bug fixes for HTML stuff,
Added xmlValidGetValidElements and xmlValidGetPotentialChildren,
Completed and cleaned up the tests,
Added doc for new modules gnome-xml-xmlmemory.html and gnome-xml-nanohttp.html,
Daniel
Diffstat (limited to 'testHTML.c')
-rw-r--r-- | testHTML.c | 577 |
1 files changed, 570 insertions, 7 deletions
@@ -15,6 +15,8 @@ #include <stdio.h> #include <string.h> +#include <stdarg.h> + #ifdef HAVE_SYS_TYPES_H #include <sys/types.h> @@ -32,12 +34,16 @@ #include <stdlib.h> #endif +#include "xmlmemory.h" #include "HTMLparser.h" #include "HTMLtree.h" #include "debugXML.h" static int debug = 0; static int copy = 0; +static int sax = 0; +static int repeat = 0; +static int noout = 0; /* * Note: this is perfectly clean HTML, i.e. not a useful test. @@ -59,12 +65,544 @@ We are doing our best to get it back on-line,\n\ "; */ +xmlSAXHandler emptySAXHandlerStruct = { + NULL, /* internalSubset */ + NULL, /* isStandalone */ + NULL, /* hasInternalSubset */ + NULL, /* hasExternalSubset */ + NULL, /* resolveEntity */ + NULL, /* getEntity */ + NULL, /* entityDecl */ + NULL, /* notationDecl */ + NULL, /* attributeDecl */ + NULL, /* elementDecl */ + NULL, /* unparsedEntityDecl */ + NULL, /* setDocumentLocator */ + NULL, /* startDocument */ + NULL, /* endDocument */ + NULL, /* startElement */ + NULL, /* endElement */ + NULL, /* reference */ + NULL, /* characters */ + NULL, /* ignorableWhitespace */ + NULL, /* processingInstruction */ + NULL, /* comment */ + NULL, /* xmlParserWarning */ + NULL, /* xmlParserError */ + NULL, /* xmlParserError */ + NULL, /* getParameterEntity */ +}; + +xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct; +extern xmlSAXHandlerPtr debugSAXHandler; + +/************************************************************************ + * * + * Debug Handlers * + * * + ************************************************************************/ + +/** + * isStandaloneDebug: + * @ctxt: An XML parser context + * + * Is this document tagged standalone ? + * + * Returns 1 if true + */ +int +isStandaloneDebug(void *ctx) +{ + fprintf(stdout, "SAX.isStandalone()\n"); + return(0); +} + +/** + * hasInternalSubsetDebug: + * @ctxt: An XML parser context + * + * Does this document has an internal subset + * + * Returns 1 if true + */ +int +hasInternalSubsetDebug(void *ctx) +{ + fprintf(stdout, "SAX.hasInternalSubset()\n"); + return(0); +} + +/** + * hasExternalSubsetDebug: + * @ctxt: An XML parser context + * + * Does this document has an external subset + * + * Returns 1 if true + */ +int +hasExternalSubsetDebug(void *ctx) +{ + fprintf(stdout, "SAX.hasExternalSubset()\n"); + return(0); +} + +/** + * hasInternalSubsetDebug: + * @ctxt: An XML parser context + * + * Does this document has an internal subset + */ +void +internalSubsetDebug(void *ctx, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID) +{ + /* xmlDtdPtr externalSubset; */ + + fprintf(stdout, "SAX.internalSubset(%s, %s, %s)\n", + name, ExternalID, SystemID); + +/*********** + if ((ExternalID != NULL) || (SystemID != NULL)) { + externalSubset = xmlParseDTD(ExternalID, SystemID); + if (externalSubset != NULL) { + xmlFreeDtd(externalSubset); + } + } + ***********/ +} + +/** + * resolveEntityDebug: + * @ctxt: An XML parser context + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * + * Special entity resolver, better left to the parser, it has + * more context than the application layer. + * The default behaviour is to NOT resolve the entities, in that case + * the ENTITY_REF nodes are built in the structure (and the parameter + * values). + * + * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. + */ +xmlParserInputPtr +resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId) +{ + /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */ + + + fprintf(stdout, "SAX.resolveEntity("); + if (publicId != NULL) + fprintf(stdout, "%s", (char *)publicId); + else + fprintf(stdout, " "); + if (systemId != NULL) + fprintf(stdout, ", %s)\n", (char *)systemId); + else + fprintf(stdout, ", )\n"); +/********* + if (systemId != NULL) { + return(xmlNewInputFromFile(ctxt, (char *) systemId)); + } + *********/ + return(NULL); +} + +/** + * getEntityDebug: + * @ctxt: An XML parser context + * @name: The entity name + * + * Get an entity by name + * + * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. + */ +xmlEntityPtr +getEntityDebug(void *ctx, const xmlChar *name) +{ + fprintf(stdout, "SAX.getEntity(%s)\n", name); + return(NULL); +} + +/** + * getParameterEntityDebug: + * @ctxt: An XML parser context + * @name: The entity name + * + * Get a parameter entity by name + * + * Returns the xmlParserInputPtr + */ +xmlEntityPtr +getParameterEntityDebug(void *ctx, const xmlChar *name) +{ + fprintf(stdout, "SAX.getParameterEntity(%s)\n", name); + return(NULL); +} + + +/** + * entityDeclDebug: + * @ctxt: An XML parser context + * @name: the entity name + * @type: the entity type + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * @content: the entity value (without processing). + * + * An entity definition has been parsed + */ +void +entityDeclDebug(void *ctx, const xmlChar *name, int type, + const xmlChar *publicId, const xmlChar *systemId, xmlChar *content) +{ + fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n", + name, type, publicId, systemId, content); +} + +/** + * attributeDeclDebug: + * @ctxt: An XML parser context + * @name: the attribute name + * @type: the attribute type + * + * An attribute definition has been parsed + */ +void +attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name, + int type, int def, const xmlChar *defaultValue, + xmlEnumerationPtr tree) +{ + fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n", + elem, name, type, def, defaultValue); +} + +/** + * elementDeclDebug: + * @ctxt: An XML parser context + * @name: the element name + * @type: the element type + * @content: the element value (without processing). + * + * An element definition has been parsed + */ +void +elementDeclDebug(void *ctx, const xmlChar *name, int type, + xmlElementContentPtr content) +{ + fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n", + name, type); +} + +/** + * notationDeclDebug: + * @ctxt: An XML parser context + * @name: The name of the notation + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * + * What to do when a notation declaration has been parsed. + */ +void +notationDeclDebug(void *ctx, const xmlChar *name, + const xmlChar *publicId, const xmlChar *systemId) +{ + fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n", + (char *) name, (char *) publicId, (char *) systemId); +} + +/** + * unparsedEntityDeclDebug: + * @ctxt: An XML parser context + * @name: The name of the entity + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * @notationName: the name of the notation + * + * What to do when an unparsed entity declaration is parsed + */ +void +unparsedEntityDeclDebug(void *ctx, const xmlChar *name, + const xmlChar *publicId, const xmlChar *systemId, + const xmlChar *notationName) +{ + fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n", + (char *) name, (char *) publicId, (char *) systemId, + (char *) notationName); +} + +/** + * setDocumentLocatorDebug: + * @ctxt: An XML parser context + * @loc: A SAX Locator + * + * Receive the document locator at startup, actually xmlDefaultSAXLocator + * Everything is available on the context, so this is useless in our case. + */ +void +setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc) +{ + fprintf(stdout, "SAX.setDocumentLocator()\n"); +} + +/** + * startDocumentDebug: + * @ctxt: An XML parser context + * + * called when the document start being processed. + */ +void +startDocumentDebug(void *ctx) +{ + fprintf(stdout, "SAX.startDocument()\n"); +} + +/** + * endDocumentDebug: + * @ctxt: An XML parser context + * + * called when the document end has been detected. + */ +void +endDocumentDebug(void *ctx) +{ + fprintf(stdout, "SAX.endDocument()\n"); +} + +/** + * startElementDebug: + * @ctxt: An XML parser context + * @name: The element name + * + * called when an opening tag has been processed. + */ +void +startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts) +{ + int i; + + fprintf(stdout, "SAX.startElement(%s", (char *) name); + if (atts != NULL) { + for (i = 0;(atts[i] != NULL);i++) { + fprintf(stdout, ", %s='", atts[i++]); + fprintf(stdout, "%s'", atts[i]); + } + } + fprintf(stdout, ")\n"); +} + +/** + * endElementDebug: + * @ctxt: An XML parser context + * @name: The element name + * + * called when the end of an element has been detected. + */ +void +endElementDebug(void *ctx, const xmlChar *name) +{ + fprintf(stdout, "SAX.endElement(%s)\n", (char *) name); +} + +/** + * charactersDebug: + * @ctxt: An XML parser context + * @ch: a xmlChar string + * @len: the number of xmlChar + * + * receiving some chars from the parser. + * Question: how much at a time ??? + */ +void +charactersDebug(void *ctx, const xmlChar *ch, int len) +{ + int i; + + fprintf(stdout, "SAX.characters("); + for (i = 0;(i < len) && (i < 30);i++) + fprintf(stdout, "%c", ch[i]); + fprintf(stdout, ", %d)\n", len); +} + +/** + * referenceDebug: + * @ctxt: An XML parser context + * @name: The entity name + * + * called when an entity reference is detected. + */ +void +referenceDebug(void *ctx, const xmlChar *name) +{ + fprintf(stdout, "SAX.reference(%s)\n", name); +} + +/** + * ignorableWhitespaceDebug: + * @ctxt: An XML parser context + * @ch: a xmlChar string + * @start: the first char in the string + * @len: the number of xmlChar + * + * receiving some ignorable whitespaces from the parser. + * Question: how much at a time ??? + */ +void +ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len) +{ + fprintf(stdout, "SAX.ignorableWhitespace(%.30s, %d)\n", + (char *) ch, len); +} + +/** + * processingInstructionDebug: + * @ctxt: An XML parser context + * @target: the target name + * @data: the PI data's + * @len: the number of xmlChar + * + * A processing instruction has been parsed. + */ +void +processingInstructionDebug(void *ctx, const xmlChar *target, + const xmlChar *data) +{ + fprintf(stdout, "SAX.processingInstruction(%s, %s)\n", + (char *) target, (char *) data); +} + +/** + * commentDebug: + * @ctxt: An XML parser context + * @value: the comment content + * + * A comment has been parsed. + */ +void +commentDebug(void *ctx, const xmlChar *value) +{ + fprintf(stdout, "SAX.comment(%s)\n", value); +} + +/** + * warningDebug: + * @ctxt: An XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format a warning messages, gives file, line, position and + * extra parameters. + */ +void +warningDebug(void *ctx, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stdout, "SAX.warning: "); + vfprintf(stdout, msg, args); + va_end(args); +} + +/** + * errorDebug: + * @ctxt: An XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format a error messages, gives file, line, position and + * extra parameters. + */ +void +errorDebug(void *ctx, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stdout, "SAX.error: "); + vfprintf(stdout, msg, args); + va_end(args); +} + +/** + * fatalErrorDebug: + * @ctxt: An XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format a fatalError messages, gives file, line, position and + * extra parameters. + */ +void +fatalErrorDebug(void *ctx, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stdout, "SAX.fatalError: "); + vfprintf(stdout, msg, args); + va_end(args); +} + +xmlSAXHandler debugSAXHandlerStruct = { + internalSubsetDebug, + isStandaloneDebug, + hasInternalSubsetDebug, + hasExternalSubsetDebug, + resolveEntityDebug, + getEntityDebug, + entityDeclDebug, + notationDeclDebug, + attributeDeclDebug, + elementDeclDebug, + unparsedEntityDeclDebug, + setDocumentLocatorDebug, + startDocumentDebug, + endDocumentDebug, + startElementDebug, + endElementDebug, + referenceDebug, + charactersDebug, + ignorableWhitespaceDebug, + processingInstructionDebug, + commentDebug, + warningDebug, + errorDebug, + fatalErrorDebug, + getParameterEntityDebug, +}; + +xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct; /************************************************************************ * * * Debug * * * ************************************************************************/ +void parseSAXFile(char *filename) { + htmlDocPtr doc; + /* + * Empty callbacks for checking + */ + doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL); + if (doc != NULL) { + fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); + xmlFreeDoc(doc); + } + + if (!noout) { + /* + * Debug callback + */ + doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL); + if (doc != NULL) { + fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); + xmlFreeDoc(doc); + } + } +} + void parseAndPrintFile(char *filename) { htmlDocPtr doc, tmp; @@ -85,10 +623,12 @@ void parseAndPrintFile(char *filename) { /* * print it. */ - if (!debug) - htmlDocDump(stdout, doc); - else - xmlDebugDumpDocument(stdout, doc); + if (!noout) { + if (!debug) + htmlDocDump(stdout, doc); + else + xmlDebugDumpDocument(stdout, doc); + } /* * free it. @@ -128,7 +668,7 @@ void parseAndPrintBuffer(xmlChar *buf) { } int main(int argc, char **argv) { - int i; + int i, count; int files = 0; for (i = 1; i < argc ; i++) { @@ -136,20 +676,43 @@ int main(int argc, char **argv) { debug++; else if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy"))) copy++; + else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax"))) + sax++; + else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout"))) + noout++; + else if ((!strcmp(argv[i], "-repeat")) || + (!strcmp(argv[i], "--repeat"))) + repeat++; } for (i = 1; i < argc ; i++) { if (argv[i][0] != '-') { - parseAndPrintFile(argv[i]); + if (repeat) { + for (count = 0;count < 100 * repeat;count++) { + if (sax) + parseSAXFile(argv[i]); + else + parseAndPrintFile(argv[i]); + } + } else { + if (sax) + parseSAXFile(argv[i]); + else + parseAndPrintFile(argv[i]); + } files ++; } } if (files == 0) { - printf("Usage : %s [--debug] [--copy] HTMLfiles ...\n", + printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n", argv[0]); printf("\tParse the HTML files and output the result of the parsing\n"); printf("\t--debug : dump a debug tree of the in-memory document\n"); printf("\t--copy : used to test the internal copy implementation\n"); + printf("\t--sax : debug the sequence of SAX callbacks\n"); + printf("\t--repeat : parse the file 100 times, for timing or profiling\n"); + printf("\t--noout : do not print the result\n"); } + xmlMemoryDump(); return(0); } |