diff options
Diffstat (limited to 'bundle/libxml/xmlregexp.c')
-rw-r--r-- | bundle/libxml/xmlregexp.c | 4319 |
1 files changed, 0 insertions, 4319 deletions
diff --git a/bundle/libxml/xmlregexp.c b/bundle/libxml/xmlregexp.c deleted file mode 100644 index 129c1269ec..0000000000 --- a/bundle/libxml/xmlregexp.c +++ /dev/null @@ -1,4319 +0,0 @@ -/* - * regexp.c: generic and extensible Regular Expression engine - * - * Basically designed with the purpose of compiling regexps for - * the variety of validation/shemas mechanisms now available in - * XML related specifications thise includes: - * - XML-1.0 DTD validation - * - XML Schemas structure part 1 - * - XML Schemas Datatypes part 2 especially Appendix F - * - RELAX-NG/TREX i.e. the counter proposal - * - * See Copyright for the status of this software. - * - * Daniel Veillard <veillard@redhat.com> - */ - -#define IN_LIBXML -#include "libxml.h" - -#ifdef LIBXML_REGEXP_ENABLED - -#include <stdio.h> -#include <string.h> -#include <libxml/tree.h> -#include <libxml/parserInternals.h> -#include <libxml/xmlregexp.h> -#include <libxml/xmlautomata.h> -#include <libxml/xmlunicode.h> - -/* #define DEBUG_REGEXP_GRAPH */ -/* #define DEBUG_REGEXP_EXEC */ -/* #define DEBUG_PUSH */ -/* #define DEBUG_COMPACTION */ - -#define ERROR(str) ctxt->error = 1; \ - xmlGenericError(xmlGenericErrorContext, "Regexp: %s: %s\n", str, ctxt->cur) -#define NEXT ctxt->cur++ -#define CUR (*(ctxt->cur)) -#define NXT(index) (ctxt->cur[index]) - -#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l) -#define NEXTL(l) ctxt->cur += l; - -/** - * TODO: - * - * macro to flag unimplemented blocks - */ -#define TODO \ - xmlGenericError(xmlGenericErrorContext, \ - "Unimplemented block at %s:%d\n", \ - __FILE__, __LINE__); - - -/************************************************************************ - * * - * Datatypes and structures * - * * - ************************************************************************/ - -typedef enum { - XML_REGEXP_EPSILON = 1, - XML_REGEXP_CHARVAL, - XML_REGEXP_RANGES, - XML_REGEXP_SUBREG, - XML_REGEXP_STRING, - XML_REGEXP_ANYCHAR, /* . */ - XML_REGEXP_ANYSPACE, /* \s */ - XML_REGEXP_NOTSPACE, /* \S */ - XML_REGEXP_INITNAME, /* \l */ - XML_REGEXP_NOTINITNAME, /* \l */ - XML_REGEXP_NAMECHAR, /* \c */ - XML_REGEXP_NOTNAMECHAR, /* \C */ - XML_REGEXP_DECIMAL, /* \d */ - XML_REGEXP_NOTDECIMAL, /* \d */ - XML_REGEXP_REALCHAR, /* \w */ - XML_REGEXP_NOTREALCHAR, /* \w */ - XML_REGEXP_LETTER, - XML_REGEXP_LETTER_UPPERCASE, - XML_REGEXP_LETTER_LOWERCASE, - XML_REGEXP_LETTER_TITLECASE, - XML_REGEXP_LETTER_MODIFIER, - XML_REGEXP_LETTER_OTHERS, - XML_REGEXP_MARK, - XML_REGEXP_MARK_NONSPACING, - XML_REGEXP_MARK_SPACECOMBINING, - XML_REGEXP_MARK_ENCLOSING, - XML_REGEXP_NUMBER, - XML_REGEXP_NUMBER_DECIMAL, - XML_REGEXP_NUMBER_LETTER, - XML_REGEXP_NUMBER_OTHERS, - XML_REGEXP_PUNCT, - XML_REGEXP_PUNCT_CONNECTOR, - XML_REGEXP_PUNCT_DASH, - XML_REGEXP_PUNCT_OPEN, - XML_REGEXP_PUNCT_CLOSE, - XML_REGEXP_PUNCT_INITQUOTE, - XML_REGEXP_PUNCT_FINQUOTE, - XML_REGEXP_PUNCT_OTHERS, - XML_REGEXP_SEPAR, - XML_REGEXP_SEPAR_SPACE, - XML_REGEXP_SEPAR_LINE, - XML_REGEXP_SEPAR_PARA, - XML_REGEXP_SYMBOL, - XML_REGEXP_SYMBOL_MATH, - XML_REGEXP_SYMBOL_CURRENCY, - XML_REGEXP_SYMBOL_MODIFIER, - XML_REGEXP_SYMBOL_OTHERS, - XML_REGEXP_OTHER, - XML_REGEXP_OTHER_CONTROL, - XML_REGEXP_OTHER_FORMAT, - XML_REGEXP_OTHER_PRIVATE, - XML_REGEXP_OTHER_NA, - XML_REGEXP_BLOCK_NAME -} xmlRegAtomType; - -typedef enum { - XML_REGEXP_QUANT_EPSILON = 1, - XML_REGEXP_QUANT_ONCE, - XML_REGEXP_QUANT_OPT, - XML_REGEXP_QUANT_MULT, - XML_REGEXP_QUANT_PLUS, - XML_REGEXP_QUANT_ONCEONLY, - XML_REGEXP_QUANT_ALL, - XML_REGEXP_QUANT_RANGE -} xmlRegQuantType; - -typedef enum { - XML_REGEXP_START_STATE = 1, - XML_REGEXP_FINAL_STATE, - XML_REGEXP_TRANS_STATE -} xmlRegStateType; - -typedef enum { - XML_REGEXP_MARK_NORMAL = 0, - XML_REGEXP_MARK_START, - XML_REGEXP_MARK_VISITED -} xmlRegMarkedType; - -typedef struct _xmlRegRange xmlRegRange; -typedef xmlRegRange *xmlRegRangePtr; - -struct _xmlRegRange { - int neg; - xmlRegAtomType type; - int start; - int end; - xmlChar *blockName; -}; - -typedef struct _xmlRegAtom xmlRegAtom; -typedef xmlRegAtom *xmlRegAtomPtr; - -typedef struct _xmlAutomataState xmlRegState; -typedef xmlRegState *xmlRegStatePtr; - -struct _xmlRegAtom { - int no; - xmlRegAtomType type; - xmlRegQuantType quant; - int min; - int max; - - void *valuep; - void *valuep2; - int neg; - int codepoint; - xmlRegStatePtr start; - xmlRegStatePtr stop; - int maxRanges; - int nbRanges; - xmlRegRangePtr *ranges; - void *data; -}; - -typedef struct _xmlRegCounter xmlRegCounter; -typedef xmlRegCounter *xmlRegCounterPtr; - -struct _xmlRegCounter { - int min; - int max; -}; - -typedef struct _xmlRegTrans xmlRegTrans; -typedef xmlRegTrans *xmlRegTransPtr; - -struct _xmlRegTrans { - xmlRegAtomPtr atom; - int to; - int counter; - int count; -}; - -struct _xmlAutomataState { - xmlRegStateType type; - xmlRegMarkedType mark; - xmlRegMarkedType reached; - int no; - - int maxTrans; - int nbTrans; - xmlRegTrans *trans; -}; - -typedef struct _xmlAutomata xmlRegParserCtxt; -typedef xmlRegParserCtxt *xmlRegParserCtxtPtr; - -struct _xmlAutomata { - xmlChar *string; - xmlChar *cur; - - int error; - int neg; - - xmlRegStatePtr start; - xmlRegStatePtr end; - xmlRegStatePtr state; - - xmlRegAtomPtr atom; - - int maxAtoms; - int nbAtoms; - xmlRegAtomPtr *atoms; - - int maxStates; - int nbStates; - xmlRegStatePtr *states; - - int maxCounters; - int nbCounters; - xmlRegCounter *counters; - - int determinist; -}; - -struct _xmlRegexp { - xmlChar *string; - int nbStates; - xmlRegStatePtr *states; - int nbAtoms; - xmlRegAtomPtr *atoms; - int nbCounters; - xmlRegCounter *counters; - int determinist; - /* - * That's the compact form for determinists automatas - */ - int nbstates; - int *compact; - void **transdata; - int nbstrings; - xmlChar **stringMap; -}; - -typedef struct _xmlRegExecRollback xmlRegExecRollback; -typedef xmlRegExecRollback *xmlRegExecRollbackPtr; - -struct _xmlRegExecRollback { - xmlRegStatePtr state;/* the current state */ - int index; /* the index in the input stack */ - int nextbranch; /* the next transition to explore in that state */ - int *counts; /* save the automate state if it has some */ -}; - -typedef struct _xmlRegInputToken xmlRegInputToken; -typedef xmlRegInputToken *xmlRegInputTokenPtr; - -struct _xmlRegInputToken { - xmlChar *value; - void *data; -}; - -struct _xmlRegExecCtxt { - int status; /* execution status != 0 indicate an error */ - int determinist; /* did we found an inderterministic behaviour */ - xmlRegexpPtr comp; /* the compiled regexp */ - xmlRegExecCallbacks callback; - void *data; - - xmlRegStatePtr state;/* the current state */ - int transno; /* the current transition on that state */ - int transcount; /* the number of char in char counted transitions */ - - /* - * A stack of rollback states - */ - int maxRollbacks; - int nbRollbacks; - xmlRegExecRollback *rollbacks; - - /* - * The state of the automata if any - */ - int *counts; - - /* - * The input stack - */ - int inputStackMax; - int inputStackNr; - int index; - int *charStack; - const xmlChar *inputString; /* when operating on characters */ - xmlRegInputTokenPtr inputStack;/* when operating on strings */ - -}; - -#define REGEXP_ALL_COUNTER 0x123456 -#define REGEXP_ALL_LAX_COUNTER 0x123457 - -static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top); -static void xmlRegFreeState(xmlRegStatePtr state); -static void xmlRegFreeAtom(xmlRegAtomPtr atom); - -/************************************************************************ - * * - * Allocation/Deallocation * - * * - ************************************************************************/ - -static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt); -/** - * xmlRegEpxFromParse: - * @ctxt: the parser context used to build it - * - * Allocate a new regexp and fill it with the reult from the parser - * - * Returns the new regexp or NULL in case of error - */ -static xmlRegexpPtr -xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) { - xmlRegexpPtr ret; - - ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp)); - if (ret == NULL) - return(NULL); - memset(ret, 0, sizeof(xmlRegexp)); - ret->string = ctxt->string; - ctxt->string = NULL; - ret->nbStates = ctxt->nbStates; - ctxt->nbStates = 0; - ret->states = ctxt->states; - ctxt->states = NULL; - ret->nbAtoms = ctxt->nbAtoms; - ctxt->nbAtoms = 0; - ret->atoms = ctxt->atoms; - ctxt->atoms = NULL; - ret->nbCounters = ctxt->nbCounters; - ctxt->nbCounters = 0; - ret->counters = ctxt->counters; - ctxt->counters = NULL; - ret->determinist = ctxt->determinist; - - if ((ret->determinist != 0) && - (ret->nbCounters == 0) && - (ret->atoms != NULL) && - (ret->atoms[0] != NULL) && - (ret->atoms[0]->type == XML_REGEXP_STRING)) { - int i, j, nbstates = 0, nbatoms = 0; - int *stateRemap; - int *stringRemap; - int *transitions; - void **transdata; - xmlChar **stringMap; - xmlChar *value; - - /* - * Switch to a compact representation - * 1/ counting the effective number of states left - * 2/ conting the unique number of atoms, and check that - * they are all of the string type - * 3/ build a table state x atom for the transitions - */ - - stateRemap = xmlMalloc(ret->nbStates * sizeof(int)); - for (i = 0;i < ret->nbStates;i++) { - if (ret->states[i] != NULL) { - stateRemap[i] = nbstates; - nbstates++; - } else { - stateRemap[i] = -1; - } - } -#ifdef DEBUG_COMPACTION - printf("Final: %d states\n", nbstates); -#endif - stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *)); - stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int)); - for (i = 0;i < ret->nbAtoms;i++) { - if ((ret->atoms[i]->type == XML_REGEXP_STRING) && - (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) { - value = ret->atoms[i]->valuep; - for (j = 0;j < nbatoms;j++) { - if (xmlStrEqual(stringMap[j], value)) { - stringRemap[i] = j; - break; - } - } - if (j >= nbatoms) { - stringRemap[i] = nbatoms; - stringMap[nbatoms] = xmlStrdup(value); - nbatoms++; - } - } else { - xmlFree(stateRemap); - xmlFree(stringRemap); - for (i = 0;i < nbatoms;i++) - xmlFree(stringMap[i]); - xmlFree(stringMap); - goto fail_compact; - } - } -#ifdef DEBUG_COMPACTION - printf("Final: %d atoms\n", nbatoms); -#endif - - /* - * Allocate the transition table. The first entry for each - * state correspond to the state type. - */ - transitions = (int *) xmlMalloc(nbstates * (nbatoms + 1) * sizeof(int)); - transdata = NULL; - memset(transitions, 0, nbstates * (nbatoms + 1) * sizeof(int)); - - for (i = 0;i < ret->nbStates;i++) { - int stateno, atomno, targetno, prev; - xmlRegStatePtr state; - xmlRegTransPtr trans; - - stateno = stateRemap[i]; - if (stateno == -1) - continue; - state = ret->states[i]; - - transitions[stateno * (nbatoms + 1)] = state->type; - - for (j = 0;j < state->nbTrans;j++) { - trans = &(state->trans[j]); - if ((trans->to == -1) || (trans->atom == NULL)) - continue; - atomno = stringRemap[trans->atom->no]; - if ((trans->atom->data != NULL) && (transdata == NULL)) { - transdata = (void **) xmlMalloc(nbstates * nbatoms * - sizeof(void *)); - if (transdata != NULL) - memset(transdata, 0, - nbstates * nbatoms * sizeof(void *)); - } - targetno = stateRemap[trans->to]; - /* - * if the same atome can generate transition to 2 different - * states then it means the automata is not determinist and - * the compact form can't be used ! - */ - prev = transitions[stateno * (nbatoms + 1) + atomno + 1]; - if (prev != 0) { - if (prev != targetno + 1) { - printf("not determinist\n"); - ret->determinist = 0; -#ifdef DEBUG_COMPACTION - printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n", - i, j, trans->atom->no, trans->to, atomno, targetno); - printf(" previous to is %d\n", prev); -#endif - ret->determinist = 0; - if (transdata != NULL) - xmlFree(transdata); - xmlFree(transitions); - xmlFree(stateRemap); - xmlFree(stringRemap); - for (i = 0;i < nbatoms;i++) - xmlFree(stringMap[i]); - xmlFree(stringMap); - goto fail_compact; - } - } else { -#if 0 - printf("State %d trans %d: atom %d to %d : %d to %d\n", - i, j, trans->atom->no, trans->to, atomno, targetno); -#endif - transitions[stateno * (nbatoms + 1) + atomno + 1] = - targetno + 1; /* to avoid 0 */ - if (transdata != NULL) - transdata[stateno * nbatoms + atomno] = - trans->atom->data; - } - } - } - ret->determinist = 1; -#ifdef DEBUG_COMPACTION - /* - * Debug - */ - for (i = 0;i < nbstates;i++) { - for (j = 0;j < nbatoms + 1;j++) { - printf("%02d ", transitions[i * (nbatoms + 1) + j]); - } - printf("\n"); - } - printf("\n"); -#endif - /* - * Cleanup of the old data - */ - if (ret->states != NULL) { - for (i = 0;i < ret->nbStates;i++) - xmlRegFreeState(ret->states[i]); - xmlFree(ret->states); - } - ret->states = NULL; - ret->nbStates = 0; - if (ret->atoms != NULL) { - for (i = 0;i < ret->nbAtoms;i++) - xmlRegFreeAtom(ret->atoms[i]); - xmlFree(ret->atoms); - } - ret->atoms = NULL; - ret->nbAtoms = 0; - - ret->compact = transitions; - ret->transdata = transdata; - ret->stringMap = stringMap; - ret->nbstrings = nbatoms; - ret->nbstates = nbstates; - xmlFree(stateRemap); - xmlFree(stringRemap); - } -fail_compact: - return(ret); -} - -/** - * xmlRegNewParserCtxt: - * @string: the string to parse - * - * Allocate a new regexp parser context - * - * Returns the new context or NULL in case of error - */ -static xmlRegParserCtxtPtr -xmlRegNewParserCtxt(const xmlChar *string) { - xmlRegParserCtxtPtr ret; - - ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt)); - if (ret == NULL) - return(NULL); - memset(ret, 0, sizeof(xmlRegParserCtxt)); - if (string != NULL) - ret->string = xmlStrdup(string); - ret->cur = ret->string; - ret->neg = 0; - ret->error = 0; - ret->determinist = -1; - return(ret); -} - -/** - * xmlRegNewRange: - * @ctxt: the regexp parser context - * @neg: is that negative - * @type: the type of range - * @start: the start codepoint - * @end: the end codepoint - * - * Allocate a new regexp range - * - * Returns the new range or NULL in case of error - */ -static xmlRegRangePtr -xmlRegNewRange(xmlRegParserCtxtPtr ctxt, - int neg, xmlRegAtomType type, int start, int end) { - xmlRegRangePtr ret; - - ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange)); - if (ret == NULL) { - ERROR("failed to allocate regexp range"); - return(NULL); - } - ret->neg = neg; - ret->type = type; - ret->start = start; - ret->end = end; - return(ret); -} - -/** - * xmlRegFreeRange: - * @range: the regexp range - * - * Free a regexp range - */ -static void -xmlRegFreeRange(xmlRegRangePtr range) { - if (range == NULL) - return; - - if (range->blockName != NULL) - xmlFree(range->blockName); - xmlFree(range); -} - -/** - * xmlRegNewAtom: - * @ctxt: the regexp parser context - * @type: the type of atom - * - * Allocate a new regexp range - * - * Returns the new atom or NULL in case of error - */ -static xmlRegAtomPtr -xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) { - xmlRegAtomPtr ret; - - ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom)); - if (ret == NULL) { - ERROR("failed to allocate regexp atom"); - return(NULL); - } - memset(ret, 0, sizeof(xmlRegAtom)); - ret->type = type; - ret->quant = XML_REGEXP_QUANT_ONCE; - ret->min = 0; - ret->max = 0; - return(ret); -} - -/** - * xmlRegFreeAtom: - * @atom: the regexp atom - * - * Free a regexp atom - */ -static void -xmlRegFreeAtom(xmlRegAtomPtr atom) { - int i; - - if (atom == NULL) - return; - - for (i = 0;i < atom->nbRanges;i++) - xmlRegFreeRange(atom->ranges[i]); - if (atom->ranges != NULL) - xmlFree(atom->ranges); - if (atom->type == XML_REGEXP_STRING) - xmlFree(atom->valuep); - xmlFree(atom); -} - -static xmlRegStatePtr -xmlRegNewState(xmlRegParserCtxtPtr ctxt) { - xmlRegStatePtr ret; - - ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState)); - if (ret == NULL) { - ERROR("failed to allocate regexp state"); - return(NULL); - } - memset(ret, 0, sizeof(xmlRegState)); - ret->type = XML_REGEXP_TRANS_STATE; - ret->mark = XML_REGEXP_MARK_NORMAL; - return(ret); -} - -/** - * xmlRegFreeState: - * @state: the regexp state - * - * Free a regexp state - */ -static void -xmlRegFreeState(xmlRegStatePtr state) { - if (state == NULL) - return; - - if (state->trans != NULL) - xmlFree(state->trans); - xmlFree(state); -} - -/** - * xmlRegFreeParserCtxt: - * @ctxt: the regexp parser context - * - * Free a regexp parser context - */ -static void -xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) { - int i; - if (ctxt == NULL) - return; - - if (ctxt->string != NULL) - xmlFree(ctxt->string); - if (ctxt->states != NULL) { - for (i = 0;i < ctxt->nbStates;i++) - xmlRegFreeState(ctxt->states[i]); - xmlFree(ctxt->states); - } - if (ctxt->atoms != NULL) { - for (i = 0;i < ctxt->nbAtoms;i++) - xmlRegFreeAtom(ctxt->atoms[i]); - xmlFree(ctxt->atoms); - } - if (ctxt->counters != NULL) - xmlFree(ctxt->counters); - xmlFree(ctxt); -} - -/************************************************************************ - * * - * Display of Data structures * - * * - ************************************************************************/ - -static void -xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) { - switch (type) { - case XML_REGEXP_EPSILON: - fprintf(output, "epsilon "); break; - case XML_REGEXP_CHARVAL: - fprintf(output, "charval "); break; - case XML_REGEXP_RANGES: - fprintf(output, "ranges "); break; - case XML_REGEXP_SUBREG: - fprintf(output, "subexpr "); break; - case XML_REGEXP_STRING: - fprintf(output, "string "); break; - case XML_REGEXP_ANYCHAR: - fprintf(output, "anychar "); break; - case XML_REGEXP_ANYSPACE: - fprintf(output, "anyspace "); break; - case XML_REGEXP_NOTSPACE: - fprintf(output, "notspace "); break; - case XML_REGEXP_INITNAME: - fprintf(output, "initname "); break; - case XML_REGEXP_NOTINITNAME: - fprintf(output, "notinitname "); break; - case XML_REGEXP_NAMECHAR: - fprintf(output, "namechar "); break; - case XML_REGEXP_NOTNAMECHAR: - fprintf(output, "notnamechar "); break; - case XML_REGEXP_DECIMAL: - fprintf(output, "decimal "); break; - case XML_REGEXP_NOTDECIMAL: - fprintf(output, "notdecimal "); break; - case XML_REGEXP_REALCHAR: - fprintf(output, "realchar "); break; - case XML_REGEXP_NOTREALCHAR: - fprintf(output, "notrealchar "); break; - case XML_REGEXP_LETTER: - fprintf(output, "LETTER "); break; - case XML_REGEXP_LETTER_UPPERCASE: - fprintf(output, "LETTER_UPPERCASE "); break; - case XML_REGEXP_LETTER_LOWERCASE: - fprintf(output, "LETTER_LOWERCASE "); break; - case XML_REGEXP_LETTER_TITLECASE: - fprintf(output, "LETTER_TITLECASE "); break; - case XML_REGEXP_LETTER_MODIFIER: - fprintf(output, "LETTER_MODIFIER "); break; - case XML_REGEXP_LETTER_OTHERS: - fprintf(output, "LETTER_OTHERS "); break; - case XML_REGEXP_MARK: - fprintf(output, "MARK "); break; - case XML_REGEXP_MARK_NONSPACING: - fprintf(output, "MARK_NONSPACING "); break; - case XML_REGEXP_MARK_SPACECOMBINING: - fprintf(output, "MARK_SPACECOMBINING "); break; - case XML_REGEXP_MARK_ENCLOSING: - fprintf(output, "MARK_ENCLOSING "); break; - case XML_REGEXP_NUMBER: - fprintf(output, "NUMBER "); break; - case XML_REGEXP_NUMBER_DECIMAL: - fprintf(output, "NUMBER_DECIMAL "); break; - case XML_REGEXP_NUMBER_LETTER: - fprintf(output, "NUMBER_LETTER "); break; - case XML_REGEXP_NUMBER_OTHERS: - fprintf(output, "NUMBER_OTHERS "); break; - case XML_REGEXP_PUNCT: - fprintf(output, "PUNCT "); break; - case XML_REGEXP_PUNCT_CONNECTOR: - fprintf(output, "PUNCT_CONNECTOR "); break; - case XML_REGEXP_PUNCT_DASH: - fprintf(output, "PUNCT_DASH "); break; - case XML_REGEXP_PUNCT_OPEN: - fprintf(output, "PUNCT_OPEN "); break; - case XML_REGEXP_PUNCT_CLOSE: - fprintf(output, "PUNCT_CLOSE "); break; - case XML_REGEXP_PUNCT_INITQUOTE: - fprintf(output, "PUNCT_INITQUOTE "); break; - case XML_REGEXP_PUNCT_FINQUOTE: - fprintf(output, "PUNCT_FINQUOTE "); break; - case XML_REGEXP_PUNCT_OTHERS: - fprintf(output, "PUNCT_OTHERS "); break; - case XML_REGEXP_SEPAR: - fprintf(output, "SEPAR "); break; - case XML_REGEXP_SEPAR_SPACE: - fprintf(output, "SEPAR_SPACE "); break; - case XML_REGEXP_SEPAR_LINE: - fprintf(output, "SEPAR_LINE "); break; - case XML_REGEXP_SEPAR_PARA: - fprintf(output, "SEPAR_PARA "); break; - case XML_REGEXP_SYMBOL: - fprintf(output, "SYMBOL "); break; - case XML_REGEXP_SYMBOL_MATH: - fprintf(output, "SYMBOL_MATH "); break; - case XML_REGEXP_SYMBOL_CURRENCY: - fprintf(output, "SYMBOL_CURRENCY "); break; - case XML_REGEXP_SYMBOL_MODIFIER: - fprintf(output, "SYMBOL_MODIFIER "); break; - case XML_REGEXP_SYMBOL_OTHERS: - fprintf(output, "SYMBOL_OTHERS "); break; - case XML_REGEXP_OTHER: - fprintf(output, "OTHER "); break; - case XML_REGEXP_OTHER_CONTROL: - fprintf(output, "OTHER_CONTROL "); break; - case XML_REGEXP_OTHER_FORMAT: - fprintf(output, "OTHER_FORMAT "); break; - case XML_REGEXP_OTHER_PRIVATE: - fprintf(output, "OTHER_PRIVATE "); break; - case XML_REGEXP_OTHER_NA: - fprintf(output, "OTHER_NA "); break; - case XML_REGEXP_BLOCK_NAME: - fprintf(output, "BLOCK "); break; - } -} - -static void -xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) { - switch (type) { - case XML_REGEXP_QUANT_EPSILON: - fprintf(output, "epsilon "); break; - case XML_REGEXP_QUANT_ONCE: - fprintf(output, "once "); break; - case XML_REGEXP_QUANT_OPT: - fprintf(output, "? "); break; - case XML_REGEXP_QUANT_MULT: - fprintf(output, "* "); break; - case XML_REGEXP_QUANT_PLUS: - fprintf(output, "+ "); break; - case XML_REGEXP_QUANT_RANGE: - fprintf(output, "range "); break; - case XML_REGEXP_QUANT_ONCEONLY: - fprintf(output, "onceonly "); break; - case XML_REGEXP_QUANT_ALL: - fprintf(output, "all "); break; - } -} -static void -xmlRegPrintRange(FILE *output, xmlRegRangePtr range) { - fprintf(output, " range: "); - if (range->neg) - fprintf(output, "negative "); - xmlRegPrintAtomType(output, range->type); - fprintf(output, "%c - %c\n", range->start, range->end); -} - -static void -xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) { - fprintf(output, " atom: "); - if (atom == NULL) { - fprintf(output, "NULL\n"); - return; - } - xmlRegPrintAtomType(output, atom->type); - xmlRegPrintQuantType(output, atom->quant); - if (atom->quant == XML_REGEXP_QUANT_RANGE) - fprintf(output, "%d-%d ", atom->min, atom->max); - if (atom->type == XML_REGEXP_STRING) - fprintf(output, "'%s' ", (char *) atom->valuep); - if (atom->type == XML_REGEXP_CHARVAL) - fprintf(output, "char %c\n", atom->codepoint); - else if (atom->type == XML_REGEXP_RANGES) { - int i; - fprintf(output, "%d entries\n", atom->nbRanges); - for (i = 0; i < atom->nbRanges;i++) - xmlRegPrintRange(output, atom->ranges[i]); - } else if (atom->type == XML_REGEXP_SUBREG) { - fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no); - } else { - fprintf(output, "\n"); - } -} - -static void -xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) { - fprintf(output, " trans: "); - if (trans == NULL) { - fprintf(output, "NULL\n"); - return; - } - if (trans->to < 0) { - fprintf(output, "removed\n"); - return; - } - if (trans->counter >= 0) { - fprintf(output, "counted %d, ", trans->counter); - } - if (trans->count == REGEXP_ALL_COUNTER) { - fprintf(output, "all transition, "); - } else if (trans->count >= 0) { - fprintf(output, "count based %d, ", trans->count); - } - if (trans->atom == NULL) { - fprintf(output, "epsilon to %d\n", trans->to); - return; - } - if (trans->atom->type == XML_REGEXP_CHARVAL) - fprintf(output, "char %c ", trans->atom->codepoint); - fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to); -} - -static void -xmlRegPrintState(FILE *output, xmlRegStatePtr state) { - int i; - - fprintf(output, " state: "); - if (state == NULL) { - fprintf(output, "NULL\n"); - return; - } - if (state->type == XML_REGEXP_START_STATE) - fprintf(output, "START "); - if (state->type == XML_REGEXP_FINAL_STATE) - fprintf(output, "FINAL "); - - fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans); - for (i = 0;i < state->nbTrans; i++) { - xmlRegPrintTrans(output, &(state->trans[i])); - } -} - -#ifdef DEBUG_REGEXP_GRAPH -static void -xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) { - int i; - - fprintf(output, " ctxt: "); - if (ctxt == NULL) { - fprintf(output, "NULL\n"); - return; - } - fprintf(output, "'%s' ", ctxt->string); - if (ctxt->error) - fprintf(output, "error "); - if (ctxt->neg) - fprintf(output, "neg "); - fprintf(output, "\n"); - fprintf(output, "%d atoms:\n", ctxt->nbAtoms); - for (i = 0;i < ctxt->nbAtoms; i++) { - fprintf(output, " %02d ", i); - xmlRegPrintAtom(output, ctxt->atoms[i]); - } - if (ctxt->atom != NULL) { - fprintf(output, "current atom:\n"); - xmlRegPrintAtom(output, ctxt->atom); - } - fprintf(output, "%d states:", ctxt->nbStates); - if (ctxt->start != NULL) - fprintf(output, " start: %d", ctxt->start->no); - if (ctxt->end != NULL) - fprintf(output, " end: %d", ctxt->end->no); - fprintf(output, "\n"); - for (i = 0;i < ctxt->nbStates; i++) { - xmlRegPrintState(output, ctxt->states[i]); - } - fprintf(output, "%d counters:\n", ctxt->nbCounters); - for (i = 0;i < ctxt->nbCounters; i++) { - fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min, - ctxt->counters[i].max); - } -} -#endif - -/************************************************************************ - * * - * Finite Automata structures manipulations * - * * - ************************************************************************/ - -static void -xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom, - int neg, xmlRegAtomType type, int start, int end, - xmlChar *blockName) { - xmlRegRangePtr range; - - if (atom == NULL) { - ERROR("add range: atom is NULL"); - return; - } - if (atom->type != XML_REGEXP_RANGES) { - ERROR("add range: atom is not ranges"); - return; - } - if (atom->maxRanges == 0) { - atom->maxRanges = 4; - atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges * - sizeof(xmlRegRangePtr)); - if (atom->ranges == NULL) { - ERROR("add range: allocation failed"); - atom->maxRanges = 0; - return; - } - } else if (atom->nbRanges >= atom->maxRanges) { - xmlRegRangePtr *tmp; - atom->maxRanges *= 2; - tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges * - sizeof(xmlRegRangePtr)); - if (tmp == NULL) { - ERROR("add range: allocation failed"); - atom->maxRanges /= 2; - return; - } - atom->ranges = tmp; - } - range = xmlRegNewRange(ctxt, neg, type, start, end); - if (range == NULL) - return; - range->blockName = blockName; - atom->ranges[atom->nbRanges++] = range; - -} - -static int -xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) { - if (ctxt->maxCounters == 0) { - ctxt->maxCounters = 4; - ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters * - sizeof(xmlRegCounter)); - if (ctxt->counters == NULL) { - ERROR("reg counter: allocation failed"); - ctxt->maxCounters = 0; - return(-1); - } - } else if (ctxt->nbCounters >= ctxt->maxCounters) { - xmlRegCounter *tmp; - ctxt->maxCounters *= 2; - tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters * - sizeof(xmlRegCounter)); - if (tmp == NULL) { - ERROR("reg counter: allocation failed"); - ctxt->maxCounters /= 2; - return(-1); - } - ctxt->counters = tmp; - } - ctxt->counters[ctxt->nbCounters].min = -1; - ctxt->counters[ctxt->nbCounters].max = -1; - return(ctxt->nbCounters++); -} - -static void -xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) { - if (atom == NULL) { - ERROR("atom push: atom is NULL"); - return; - } - if (ctxt->maxAtoms == 0) { - ctxt->maxAtoms = 4; - ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms * - sizeof(xmlRegAtomPtr)); - if (ctxt->atoms == NULL) { - ERROR("atom push: allocation failed"); - ctxt->maxAtoms = 0; - return; - } - } else if (ctxt->nbAtoms >= ctxt->maxAtoms) { - xmlRegAtomPtr *tmp; - ctxt->maxAtoms *= 2; - tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms * - sizeof(xmlRegAtomPtr)); - if (tmp == NULL) { - ERROR("atom push: allocation failed"); - ctxt->maxAtoms /= 2; - return; - } - ctxt->atoms = tmp; - } - atom->no = ctxt->nbAtoms; - ctxt->atoms[ctxt->nbAtoms++] = atom; -} - -static void -xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, - xmlRegAtomPtr atom, xmlRegStatePtr target, - int counter, int count) { - if (state == NULL) { - ERROR("add state: state is NULL"); - return; - } - if (target == NULL) { - ERROR("add state: target is NULL"); - return; - } - if (state->maxTrans == 0) { - state->maxTrans = 4; - state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans * - sizeof(xmlRegTrans)); - if (state->trans == NULL) { - ERROR("add range: allocation failed"); - state->maxTrans = 0; - return; - } - } else if (state->nbTrans >= state->maxTrans) { - xmlRegTrans *tmp; - state->maxTrans *= 2; - tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans * - sizeof(xmlRegTrans)); - if (tmp == NULL) { - ERROR("add range: allocation failed"); - state->maxTrans /= 2; - return; - } - state->trans = tmp; - } -#ifdef DEBUG_REGEXP_GRAPH - printf("Add trans from %d to %d ", state->no, target->no); - if (count == REGEXP_ALL_COUNTER) - printf("all transition"); - else if (count >= 0) - printf("count based %d", count); - else if (counter >= 0) - printf("counted %d", counter); - else if (atom == NULL) - printf("epsilon transition"); - printf("\n"); -#endif - - state->trans[state->nbTrans].atom = atom; - state->trans[state->nbTrans].to = target->no; - state->trans[state->nbTrans].counter = counter; - state->trans[state->nbTrans].count = count; - state->nbTrans++; -} - -static void -xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) { - if (ctxt->maxStates == 0) { - ctxt->maxStates = 4; - ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates * - sizeof(xmlRegStatePtr)); - if (ctxt->states == NULL) { - ERROR("add range: allocation failed"); - ctxt->maxStates = 0; - return; - } - } else if (ctxt->nbStates >= ctxt->maxStates) { - xmlRegStatePtr *tmp; - ctxt->maxStates *= 2; - tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates * - sizeof(xmlRegStatePtr)); - if (tmp == NULL) { - ERROR("add range: allocation failed"); - ctxt->maxStates /= 2; - return; - } - ctxt->states = tmp; - } - state->no = ctxt->nbStates; - ctxt->states[ctxt->nbStates++] = state; -} - -/** - * xmlFAGenerateAllTransition: - * @ctxt: a regexp parser context - * @from: the from state - * @to: the target state or NULL for building a new one - * @lax: - * - */ -static void -xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt, - xmlRegStatePtr from, xmlRegStatePtr to, - int lax) { - if (to == NULL) { - to = xmlRegNewState(ctxt); - xmlRegStatePush(ctxt, to); - ctxt->state = to; - } - if (lax) - xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER); - else - xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER); -} - -/** - * xmlFAGenerateEpsilonTransition: - * @ctxt: a regexp parser context - * @from: the from state - * @to: the target state or NULL for building a new one - * - */ -static void -xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt, - xmlRegStatePtr from, xmlRegStatePtr to) { - if (to == NULL) { - to = xmlRegNewState(ctxt); - xmlRegStatePush(ctxt, to); - ctxt->state = to; - } - xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1); -} - -/** - * xmlFAGenerateCountedEpsilonTransition: - * @ctxt: a regexp parser context - * @from: the from state - * @to: the target state or NULL for building a new one - * counter: the counter for that transition - * - */ -static void -xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt, - xmlRegStatePtr from, xmlRegStatePtr to, int counter) { - if (to == NULL) { - to = xmlRegNewState(ctxt); - xmlRegStatePush(ctxt, to); - ctxt->state = to; - } - xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1); -} - -/** - * xmlFAGenerateCountedTransition: - * @ctxt: a regexp parser context - * @from: the from state - * @to: the target state or NULL for building a new one - * counter: the counter for that transition - * - */ -static void -xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt, - xmlRegStatePtr from, xmlRegStatePtr to, int counter) { - if (to == NULL) { - to = xmlRegNewState(ctxt); - xmlRegStatePush(ctxt, to); - ctxt->state = to; - } - xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter); -} - -/** - * xmlFAGenerateTransitions: - * @ctxt: a regexp parser context - * @from: the from state - * @to: the target state or NULL for building a new one - * @atom: the atom generating the transition - * - */ -static void -xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, - xmlRegStatePtr to, xmlRegAtomPtr atom) { - if (atom == NULL) { - ERROR("genrate transition: atom == NULL"); - return; - } - if (atom->type == XML_REGEXP_SUBREG) { - /* - * this is a subexpression handling one should not need to - * create a new node excep for XML_REGEXP_QUANT_RANGE. - */ - xmlRegAtomPush(ctxt, atom); - if ((to != NULL) && (atom->stop != to) && - (atom->quant != XML_REGEXP_QUANT_RANGE)) { - /* - * Generate an epsilon transition to link to the target - */ - xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to); - } - switch (atom->quant) { - case XML_REGEXP_QUANT_OPT: - atom->quant = XML_REGEXP_QUANT_ONCE; - xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); - break; - case XML_REGEXP_QUANT_MULT: - atom->quant = XML_REGEXP_QUANT_ONCE; - xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop); - xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); - break; - case XML_REGEXP_QUANT_PLUS: - atom->quant = XML_REGEXP_QUANT_ONCE; - xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start); - break; - case XML_REGEXP_QUANT_RANGE: { - int counter; - xmlRegStatePtr newstate; - - /* - * This one is nasty: - * 1/ register a new counter - * 2/ register an epsilon transition associated to - * this counter going from atom->stop to atom->start - * 3/ create a new state - * 4/ generate a counted transition from atom->stop to - * that state - */ - counter = xmlRegGetCounter(ctxt); - ctxt->counters[counter].min = atom->min - 1; - ctxt->counters[counter].max = atom->max - 1; - atom->min = 0; - atom->max = 0; - atom->quant = XML_REGEXP_QUANT_ONCE; - xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop, - atom->start, counter); - if (to != NULL) { - newstate = to; - } else { - newstate = xmlRegNewState(ctxt); - xmlRegStatePush(ctxt, newstate); - ctxt->state = newstate; - } - xmlFAGenerateCountedTransition(ctxt, atom->stop, - newstate, counter); - } - default: - break; - } - return; - } else { - if (to == NULL) { - to = xmlRegNewState(ctxt); - xmlRegStatePush(ctxt, to); - } - xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); - xmlRegAtomPush(ctxt, atom); - ctxt->state = to; - } - switch (atom->quant) { - case XML_REGEXP_QUANT_OPT: - atom->quant = XML_REGEXP_QUANT_ONCE; - xmlFAGenerateEpsilonTransition(ctxt, from, to); - break; - case XML_REGEXP_QUANT_MULT: - atom->quant = XML_REGEXP_QUANT_ONCE; - xmlFAGenerateEpsilonTransition(ctxt, from, to); - xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); - break; - case XML_REGEXP_QUANT_PLUS: - atom->quant = XML_REGEXP_QUANT_ONCE; - xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); - break; - default: - break; - } -} - -/** - * xmlFAReduceEpsilonTransitions: - * @ctxt: a regexp parser context - * @fromnr: the from state - * @tonr: the to state - * @cpunter: should that transition be associted to a counted - * - */ -static void -xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr, - int tonr, int counter) { - int transnr; - xmlRegStatePtr from; - xmlRegStatePtr to; - -#ifdef DEBUG_REGEXP_GRAPH - printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr); -#endif - from = ctxt->states[fromnr]; - if (from == NULL) - return; - to = ctxt->states[tonr]; - if (to == NULL) - return; - if ((to->mark == XML_REGEXP_MARK_START) || - (to->mark == XML_REGEXP_MARK_VISITED)) - return; - - to->mark = XML_REGEXP_MARK_VISITED; - if (to->type == XML_REGEXP_FINAL_STATE) { -#ifdef DEBUG_REGEXP_GRAPH - printf("State %d is final, so %d becomes final\n", tonr, fromnr); -#endif - from->type = XML_REGEXP_FINAL_STATE; - } - for (transnr = 0;transnr < to->nbTrans;transnr++) { - if (to->trans[transnr].atom == NULL) { - /* - * Don't remove counted transitions - * Don't loop either - */ - if (to->trans[transnr].to != fromnr) { - if (to->trans[transnr].count >= 0) { - int newto = to->trans[transnr].to; - - xmlRegStateAddTrans(ctxt, from, NULL, - ctxt->states[newto], - -1, to->trans[transnr].count); - } else { -#ifdef DEBUG_REGEXP_GRAPH - printf("Found epsilon trans %d from %d to %d\n", - transnr, tonr, to->trans[transnr].to); -#endif - if (to->trans[transnr].counter >= 0) { - xmlFAReduceEpsilonTransitions(ctxt, fromnr, - to->trans[transnr].to, - to->trans[transnr].counter); - } else { - xmlFAReduceEpsilonTransitions(ctxt, fromnr, - to->trans[transnr].to, - counter); - } - } - } - } else { - int newto = to->trans[transnr].to; - - if (to->trans[transnr].counter >= 0) { - xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, - ctxt->states[newto], - to->trans[transnr].counter, -1); - } else { - xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, - ctxt->states[newto], counter, -1); - } - } - } - to->mark = XML_REGEXP_MARK_NORMAL; -} - -/** - * xmlFAEliminateEpsilonTransitions: - * @ctxt: a regexp parser context - * - */ -static void -xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { - int statenr, transnr; - xmlRegStatePtr state; - - /* - * build the completed transitions bypassing the epsilons - * Use a marking algorithm to avoid loops - */ - for (statenr = 0;statenr < ctxt->nbStates;statenr++) { - state = ctxt->states[statenr]; - if (state == NULL) - continue; - for (transnr = 0;transnr < state->nbTrans;transnr++) { - if ((state->trans[transnr].atom == NULL) && - (state->trans[transnr].to >= 0)) { - if (state->trans[transnr].to == statenr) { - state->trans[transnr].to = -1; -#ifdef DEBUG_REGEXP_GRAPH - printf("Removed loopback epsilon trans %d on %d\n", - transnr, statenr); -#endif - } else if (state->trans[transnr].count < 0) { - int newto = state->trans[transnr].to; - -#ifdef DEBUG_REGEXP_GRAPH - printf("Found epsilon trans %d from %d to %d\n", - transnr, statenr, newto); -#endif - state->mark = XML_REGEXP_MARK_START; - xmlFAReduceEpsilonTransitions(ctxt, statenr, - newto, state->trans[transnr].counter); - state->mark = XML_REGEXP_MARK_NORMAL; -#ifdef DEBUG_REGEXP_GRAPH - } else { - printf("Found counted transition %d on %d\n", - transnr, statenr); -#endif - } - } - } - } - /* - * Eliminate the epsilon transitions - */ - for (statenr = 0;statenr < ctxt->nbStates;statenr++) { - state = ctxt->states[statenr]; - if (state == NULL) - continue; - for (transnr = 0;transnr < state->nbTrans;transnr++) { - if ((state->trans[transnr].atom == NULL) && - (state->trans[transnr].count < 0) && - (state->trans[transnr].to >= 0)) { - state->trans[transnr].to = -1; - } - } - } - - /* - * Use this pass to detect unreachable states too - */ - for (statenr = 0;statenr < ctxt->nbStates;statenr++) { - state = ctxt->states[statenr]; - if (state != NULL) - state->reached = 0; - } - state = ctxt->states[0]; - if (state != NULL) - state->reached = 1; - while (state != NULL) { - xmlRegStatePtr target = NULL; - state->reached = 2; - /* - * Mark all state reachable from the current reachable state - */ - for (transnr = 0;transnr < state->nbTrans;transnr++) { - if ((state->trans[transnr].to >= 0) && - ((state->trans[transnr].atom != NULL) || - (state->trans[transnr].count >= 0))) { - int newto = state->trans[transnr].to; - - if (ctxt->states[newto] == NULL) - continue; - if (ctxt->states[newto]->reached == 0) { - ctxt->states[newto]->reached = 1; - target = ctxt->states[newto]; - } - } - } - /* - * find the next accessible state not explored - */ - if (target == NULL) { - for (statenr = 1;statenr < ctxt->nbStates;statenr++) { - state = ctxt->states[statenr]; - if ((state != NULL) && (state->reached == 1)) { - target = state; - break; - } - } - } - state = target; - } - for (statenr = 0;statenr < ctxt->nbStates;statenr++) { - state = ctxt->states[statenr]; - if ((state != NULL) && (state->reached == 0)) { -#ifdef DEBUG_REGEXP_GRAPH - printf("Removed unreachable state %d\n", statenr); -#endif - xmlRegFreeState(state); - ctxt->states[statenr] = NULL; - } - } - -} - -/** - * xmlFACompareAtoms: - * @atom1: an atom - * @atom2: an atom - * - * Compares two atoms to check whether they are equivatents - * - * Returns 1 if yes and 0 otherwise - */ -static int -xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) { - if (atom1 == atom2) - return(1); - if ((atom1 == NULL) || (atom2 == NULL)) - return(0); - - if (atom1->type != atom2->type) - return(0); - switch (atom1->type) { - case XML_REGEXP_STRING: - return(xmlStrEqual((xmlChar *)atom1->valuep, - (xmlChar *)atom2->valuep)); - case XML_REGEXP_EPSILON: - return(1); - case XML_REGEXP_CHARVAL: - return(atom1->codepoint == atom2->codepoint); - case XML_REGEXP_RANGES: - TODO; - return(0); - default: - break; - } - return(1); -} - -/** - * xmlFARecurseDeterminism: - * @ctxt: a regexp parser context - * - * Check whether the associated regexp is determinist, - * should be called after xmlFAEliminateEpsilonTransitions() - * - */ -static int -xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, - int to, xmlRegAtomPtr atom) { - int ret = 1; - int transnr; - xmlRegTransPtr t1; - - if (state == NULL) - return(ret); - for (transnr = 0;transnr < state->nbTrans;transnr++) { - t1 = &(state->trans[transnr]); - /* - * check transitions conflicting with the one looked at - */ - if (t1->atom == NULL) { - if (t1->to == -1) - continue; - ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], - to, atom); - if (ret == 0) - return(0); - continue; - } - if (t1->to != to) - continue; - if (xmlFACompareAtoms(t1->atom, atom)) - return(0); - } - return(ret); -} - -/** - * xmlFAComputesDeterminism: - * @ctxt: a regexp parser context - * - * Check whether the associated regexp is determinist, - * should be called after xmlFAEliminateEpsilonTransitions() - * - */ -static int -xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) { - int statenr, transnr; - xmlRegStatePtr state; - xmlRegTransPtr t1, t2; - int i; - int ret = 1; - -#ifdef DEBUG_REGEXP_GRAPH - printf("xmlFAComputesDeterminism\n"); - xmlRegPrintCtxt(stdout, ctxt); -#endif - if (ctxt->determinist != -1) - return(ctxt->determinist); - - /* - * Check for all states that there isn't 2 transitions - * with the same atom and a different target. - */ - for (statenr = 0;statenr < ctxt->nbStates;statenr++) { - state = ctxt->states[statenr]; - if (state == NULL) - continue; - for (transnr = 0;transnr < state->nbTrans;transnr++) { - t1 = &(state->trans[transnr]); - /* - * Determinism checks in case of counted or all transitions - * will have to be handled separately - */ - if (t1->atom == NULL) - continue; - if (t1->to == -1) /* eliminated */ - continue; - for (i = 0;i < transnr;i++) { - t2 = &(state->trans[i]); - if (t2->to == -1) /* eliminated */ - continue; - if (t2->atom != NULL) { - if (t1->to == t2->to) { - if (xmlFACompareAtoms(t1->atom, t2->atom)) - t2->to = -1; /* eliminate */ - } else { - /* not determinist ! */ - if (xmlFACompareAtoms(t1->atom, t2->atom)) - ret = 0; - } - } else if (t1->to != -1) { - /* - * do the closure in case of remaining specific - * epsilon transitions like choices or all - */ - ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], - t2->to, t2->atom); - if (ret == 0) - return(0); - } - } - if (ret == 0) - break; - } - if (ret == 0) - break; - } - ctxt->determinist = ret; - return(ret); -} - -/************************************************************************ - * * - * Routines to check input against transition atoms * - * * - ************************************************************************/ - -static int -xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg, - int start, int end, const xmlChar *blockName) { - int ret = 0; - - switch (type) { - case XML_REGEXP_STRING: - case XML_REGEXP_SUBREG: - case XML_REGEXP_RANGES: - case XML_REGEXP_EPSILON: - return(-1); - case XML_REGEXP_ANYCHAR: - ret = ((codepoint != '\n') && (codepoint != '\r')); - break; - case XML_REGEXP_CHARVAL: - ret = ((codepoint >= start) && (codepoint <= end)); - break; - case XML_REGEXP_NOTSPACE: - neg = !neg; - case XML_REGEXP_ANYSPACE: - ret = ((codepoint == '\n') || (codepoint == '\r') || - (codepoint == '\t') || (codepoint == ' ')); - break; - case XML_REGEXP_NOTINITNAME: - neg = !neg; - case XML_REGEXP_INITNAME: - ret = (xmlIsLetter(codepoint) || - (codepoint == '_') || (codepoint == ':')); - break; - case XML_REGEXP_NOTNAMECHAR: - neg = !neg; - case XML_REGEXP_NAMECHAR: - ret = (xmlIsLetter(codepoint) || xmlIsDigit(codepoint) || - (codepoint == '.') || (codepoint == '-') || - (codepoint == '_') || (codepoint == ':') || - xmlIsCombining(codepoint) || xmlIsExtender(codepoint)); - break; - case XML_REGEXP_NOTDECIMAL: - neg = !neg; - case XML_REGEXP_DECIMAL: - ret = xmlUCSIsCatNd(codepoint); - break; - case XML_REGEXP_REALCHAR: - neg = !neg; - case XML_REGEXP_NOTREALCHAR: - ret = xmlUCSIsCatP(codepoint); - if (ret == 0) - ret = xmlUCSIsCatZ(codepoint); - if (ret == 0) - ret = xmlUCSIsCatC(codepoint); - break; - case XML_REGEXP_LETTER: - ret = xmlUCSIsCatL(codepoint); - break; - case XML_REGEXP_LETTER_UPPERCASE: - ret = xmlUCSIsCatLu(codepoint); - break; - case XML_REGEXP_LETTER_LOWERCASE: - ret = xmlUCSIsCatLl(codepoint); - break; - case XML_REGEXP_LETTER_TITLECASE: - ret = xmlUCSIsCatLt(codepoint); - break; - case XML_REGEXP_LETTER_MODIFIER: - ret = xmlUCSIsCatLm(codepoint); - break; - case XML_REGEXP_LETTER_OTHERS: - ret = xmlUCSIsCatLo(codepoint); - break; - case XML_REGEXP_MARK: - ret = xmlUCSIsCatM(codepoint); - break; - case XML_REGEXP_MARK_NONSPACING: - ret = xmlUCSIsCatMn(codepoint); - break; - case XML_REGEXP_MARK_SPACECOMBINING: - ret = xmlUCSIsCatMc(codepoint); - break; - case XML_REGEXP_MARK_ENCLOSING: - ret = xmlUCSIsCatMe(codepoint); - break; - case XML_REGEXP_NUMBER: - ret = xmlUCSIsCatN(codepoint); - break; - case XML_REGEXP_NUMBER_DECIMAL: - ret = xmlUCSIsCatNd(codepoint); - break; - case XML_REGEXP_NUMBER_LETTER: - ret = xmlUCSIsCatNl(codepoint); - break; - case XML_REGEXP_NUMBER_OTHERS: - ret = xmlUCSIsCatNo(codepoint); - break; - case XML_REGEXP_PUNCT: - ret = xmlUCSIsCatP(codepoint); - break; - case XML_REGEXP_PUNCT_CONNECTOR: - ret = xmlUCSIsCatPc(codepoint); - break; - case XML_REGEXP_PUNCT_DASH: - ret = xmlUCSIsCatPd(codepoint); - break; - case XML_REGEXP_PUNCT_OPEN: - ret = xmlUCSIsCatPs(codepoint); - break; - case XML_REGEXP_PUNCT_CLOSE: - ret = xmlUCSIsCatPe(codepoint); - break; - case XML_REGEXP_PUNCT_INITQUOTE: - ret = xmlUCSIsCatPi(codepoint); - break; - case XML_REGEXP_PUNCT_FINQUOTE: - ret = xmlUCSIsCatPf(codepoint); - break; - case XML_REGEXP_PUNCT_OTHERS: - ret = xmlUCSIsCatPo(codepoint); - break; - case XML_REGEXP_SEPAR: - ret = xmlUCSIsCatZ(codepoint); - break; - case XML_REGEXP_SEPAR_SPACE: - ret = xmlUCSIsCatZs(codepoint); - break; - case XML_REGEXP_SEPAR_LINE: - ret = xmlUCSIsCatZl(codepoint); - break; - case XML_REGEXP_SEPAR_PARA: - ret = xmlUCSIsCatZp(codepoint); - break; - case XML_REGEXP_SYMBOL: - ret = xmlUCSIsCatS(codepoint); - break; - case XML_REGEXP_SYMBOL_MATH: - ret = xmlUCSIsCatSm(codepoint); - break; - case XML_REGEXP_SYMBOL_CURRENCY: - ret = xmlUCSIsCatSc(codepoint); - break; - case XML_REGEXP_SYMBOL_MODIFIER: - ret = xmlUCSIsCatSk(codepoint); - break; - case XML_REGEXP_SYMBOL_OTHERS: - ret = xmlUCSIsCatSo(codepoint); - break; - case XML_REGEXP_OTHER: - ret = xmlUCSIsCatC(codepoint); - break; - case XML_REGEXP_OTHER_CONTROL: - ret = xmlUCSIsCatCc(codepoint); - break; - case XML_REGEXP_OTHER_FORMAT: - ret = xmlUCSIsCatCf(codepoint); - break; - case XML_REGEXP_OTHER_PRIVATE: - ret = xmlUCSIsCatCo(codepoint); - break; - case XML_REGEXP_OTHER_NA: - /* ret = xmlUCSIsCatCn(codepoint); */ - /* Seems it doesn't exist anymore in recent Unicode releases */ - ret = 0; - break; - case XML_REGEXP_BLOCK_NAME: - ret = xmlUCSIsBlock(codepoint, (const char *) blockName); - break; - } - if (neg) - return(!ret); - return(ret); -} - -static int -xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) { - int i, ret = 0; - xmlRegRangePtr range; - - if ((atom == NULL) || (!xmlIsChar(codepoint))) - return(-1); - - switch (atom->type) { - case XML_REGEXP_SUBREG: - case XML_REGEXP_EPSILON: - return(-1); - case XML_REGEXP_CHARVAL: - return(codepoint == atom->codepoint); - case XML_REGEXP_RANGES: { - int accept = 0; - for (i = 0;i < atom->nbRanges;i++) { - range = atom->ranges[i]; - if (range->neg) { - ret = xmlRegCheckCharacterRange(range->type, codepoint, - 0, range->start, range->end, - range->blockName); - if (ret != 0) - return(0); /* excluded char */ - } else { - ret = xmlRegCheckCharacterRange(range->type, codepoint, - 0, range->start, range->end, - range->blockName); - if (ret != 0) - accept = 1; /* might still be excluded */ - } - } - return(accept); - } - case XML_REGEXP_STRING: - printf("TODO: XML_REGEXP_STRING\n"); - return(-1); - case XML_REGEXP_ANYCHAR: - case XML_REGEXP_ANYSPACE: - case XML_REGEXP_NOTSPACE: - case XML_REGEXP_INITNAME: - case XML_REGEXP_NOTINITNAME: - case XML_REGEXP_NAMECHAR: - case XML_REGEXP_NOTNAMECHAR: - case XML_REGEXP_DECIMAL: - case XML_REGEXP_NOTDECIMAL: - case XML_REGEXP_REALCHAR: - case XML_REGEXP_NOTREALCHAR: - case XML_REGEXP_LETTER: - case XML_REGEXP_LETTER_UPPERCASE: - case XML_REGEXP_LETTER_LOWERCASE: - case XML_REGEXP_LETTER_TITLECASE: - case XML_REGEXP_LETTER_MODIFIER: - case XML_REGEXP_LETTER_OTHERS: - case XML_REGEXP_MARK: - case XML_REGEXP_MARK_NONSPACING: - case XML_REGEXP_MARK_SPACECOMBINING: - case XML_REGEXP_MARK_ENCLOSING: - case XML_REGEXP_NUMBER: - case XML_REGEXP_NUMBER_DECIMAL: - case XML_REGEXP_NUMBER_LETTER: - case XML_REGEXP_NUMBER_OTHERS: - case XML_REGEXP_PUNCT: - case XML_REGEXP_PUNCT_CONNECTOR: - case XML_REGEXP_PUNCT_DASH: - case XML_REGEXP_PUNCT_OPEN: - case XML_REGEXP_PUNCT_CLOSE: - case XML_REGEXP_PUNCT_INITQUOTE: - case XML_REGEXP_PUNCT_FINQUOTE: - case XML_REGEXP_PUNCT_OTHERS: - case XML_REGEXP_SEPAR: - case XML_REGEXP_SEPAR_SPACE: - case XML_REGEXP_SEPAR_LINE: - case XML_REGEXP_SEPAR_PARA: - case XML_REGEXP_SYMBOL: - case XML_REGEXP_SYMBOL_MATH: - case XML_REGEXP_SYMBOL_CURRENCY: - case XML_REGEXP_SYMBOL_MODIFIER: - case XML_REGEXP_SYMBOL_OTHERS: - case XML_REGEXP_OTHER: - case XML_REGEXP_OTHER_CONTROL: - case XML_REGEXP_OTHER_FORMAT: - case XML_REGEXP_OTHER_PRIVATE: - case XML_REGEXP_OTHER_NA: - case XML_REGEXP_BLOCK_NAME: - ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0, - (const xmlChar *)atom->valuep); - if (atom->neg) - ret = !ret; - break; - } - return(ret); -} - -/************************************************************************ - * * - * Saving an restoring state of an execution context * - * * - ************************************************************************/ - -#ifdef DEBUG_REGEXP_EXEC -static void -xmlFARegDebugExec(xmlRegExecCtxtPtr exec) { - printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index); - if (exec->inputStack != NULL) { - int i; - printf(": "); - for (i = 0;(i < 3) && (i < exec->inputStackNr);i++) - printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]); - } else { - printf(": %s", &(exec->inputString[exec->index])); - } - printf("\n"); -} -#endif - -static void -xmlFARegExecSave(xmlRegExecCtxtPtr exec) { -#ifdef DEBUG_REGEXP_EXEC - printf("saving "); - exec->transno++; - xmlFARegDebugExec(exec); - exec->transno--; -#endif - - if (exec->maxRollbacks == 0) { - exec->maxRollbacks = 4; - exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks * - sizeof(xmlRegExecRollback)); - if (exec->rollbacks == NULL) { - fprintf(stderr, "exec save: allocation failed"); - exec->maxRollbacks = 0; - return; - } - memset(exec->rollbacks, 0, - exec->maxRollbacks * sizeof(xmlRegExecRollback)); - } else if (exec->nbRollbacks >= exec->maxRollbacks) { - xmlRegExecRollback *tmp; - int len = exec->maxRollbacks; - - exec->maxRollbacks *= 2; - tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks, - exec->maxRollbacks * sizeof(xmlRegExecRollback)); - if (tmp == NULL) { - fprintf(stderr, "exec save: allocation failed"); - exec->maxRollbacks /= 2; - return; - } - exec->rollbacks = tmp; - tmp = &exec->rollbacks[len]; - memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback)); - } - exec->rollbacks[exec->nbRollbacks].state = exec->state; - exec->rollbacks[exec->nbRollbacks].index = exec->index; - exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1; - if (exec->comp->nbCounters > 0) { - if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { - exec->rollbacks[exec->nbRollbacks].counts = (int *) - xmlMalloc(exec->comp->nbCounters * sizeof(int)); - if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { - fprintf(stderr, "exec save: allocation failed"); - exec->status = -5; - return; - } - } - memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts, - exec->comp->nbCounters * sizeof(int)); - } - exec->nbRollbacks++; -} - -static void -xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) { - if (exec->nbRollbacks <= 0) { - exec->status = -1; -#ifdef DEBUG_REGEXP_EXEC - printf("rollback failed on empty stack\n"); -#endif - return; - } - exec->nbRollbacks--; - exec->state = exec->rollbacks[exec->nbRollbacks].state; - exec->index = exec->rollbacks[exec->nbRollbacks].index; - exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch; - if (exec->comp->nbCounters > 0) { - if (exec->rollbacks[exec->nbRollbacks].counts == NULL) { - fprintf(stderr, "exec save: allocation failed"); - exec->status = -6; - return; - } - memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts, - exec->comp->nbCounters * sizeof(int)); - } - -#ifdef DEBUG_REGEXP_EXEC - printf("restored "); - xmlFARegDebugExec(exec); -#endif -} - -/************************************************************************ - * * - * Verifyer, running an input against a compiled regexp * - * * - ************************************************************************/ - -static int -xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) { - xmlRegExecCtxt execval; - xmlRegExecCtxtPtr exec = &execval; - int ret, codepoint, len; - - exec->inputString = content; - exec->index = 0; - exec->determinist = 1; - exec->maxRollbacks = 0; - exec->nbRollbacks = 0; - exec->rollbacks = NULL; - exec->status = 0; - exec->comp = comp; - exec->state = comp->states[0]; - exec->transno = 0; - exec->transcount = 0; - if (comp->nbCounters > 0) { - exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); - if (exec->counts == NULL) - return(-1); - memset(exec->counts, 0, comp->nbCounters * sizeof(int)); - } else - exec->counts = NULL; - while ((exec->status == 0) && - ((exec->inputString[exec->index] != 0) || - (exec->state->type != XML_REGEXP_FINAL_STATE))) { - xmlRegTransPtr trans; - xmlRegAtomPtr atom; - - /* - * End of input on non-terminal state, rollback, however we may - * still have epsilon like transition for counted transitions - * on counters, in that case don't break too early. - */ - if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) - goto rollback; - - exec->transcount = 0; - for (;exec->transno < exec->state->nbTrans;exec->transno++) { - trans = &exec->state->trans[exec->transno]; - if (trans->to < 0) - continue; - atom = trans->atom; - ret = 0; - if (trans->count >= 0) { - int count; - xmlRegCounterPtr counter; - - /* - * A counted transition. - */ - - count = exec->counts[trans->count]; - counter = &exec->comp->counters[trans->count]; -#ifdef DEBUG_REGEXP_EXEC - printf("testing count %d: val %d, min %d, max %d\n", - trans->count, count, counter->min, counter->max); -#endif - ret = ((count >= counter->min) && (count <= counter->max)); - } else if (atom == NULL) { - fprintf(stderr, "epsilon transition left at runtime\n"); - exec->status = -2; - break; - } else if (exec->inputString[exec->index] != 0) { - codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); - ret = xmlRegCheckCharacter(atom, codepoint); - if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { - xmlRegStatePtr to = comp->states[trans->to]; - - /* - * this is a multiple input sequence - */ - if (exec->state->nbTrans > exec->transno + 1) { - xmlFARegExecSave(exec); - } - exec->transcount = 1; - do { - /* - * Try to progress as much as possible on the input - */ - if (exec->transcount == atom->max) { - break; - } - exec->index += len; - /* - * End of input: stop here - */ - if (exec->inputString[exec->index] == 0) { - exec->index -= len; - break; - } - if (exec->transcount >= atom->min) { - int transno = exec->transno; - xmlRegStatePtr state = exec->state; - - /* - * The transition is acceptable save it - */ - exec->transno = -1; /* trick */ - exec->state = to; - xmlFARegExecSave(exec); - exec->transno = transno; - exec->state = state; - } - codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), - len); - ret = xmlRegCheckCharacter(atom, codepoint); - exec->transcount++; - } while (ret == 1); - if (exec->transcount < atom->min) - ret = 0; - - /* - * If the last check failed but one transition was found - * possible, rollback - */ - if (ret < 0) - ret = 0; - if (ret == 0) { - goto rollback; - } - } - } - if (ret == 1) { - if (exec->state->nbTrans > exec->transno + 1) { - xmlFARegExecSave(exec); - } - if (trans->counter >= 0) { -#ifdef DEBUG_REGEXP_EXEC - printf("Increasing count %d\n", trans->counter); -#endif - exec->counts[trans->counter]++; - } -#ifdef DEBUG_REGEXP_EXEC - printf("entering state %d\n", trans->to); -#endif - exec->state = comp->states[trans->to]; - exec->transno = 0; - if (trans->atom != NULL) { - exec->index += len; - } - goto progress; - } else if (ret < 0) { - exec->status = -4; - break; - } - } - if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { -rollback: - /* - * Failed to find a way out - */ - exec->determinist = 0; - xmlFARegExecRollBack(exec); - } -progress: - continue; - } - if (exec->rollbacks != NULL) { - if (exec->counts != NULL) { - int i; - - for (i = 0;i < exec->maxRollbacks;i++) - if (exec->rollbacks[i].counts != NULL) - xmlFree(exec->rollbacks[i].counts); - } - xmlFree(exec->rollbacks); - } - if (exec->counts != NULL) - xmlFree(exec->counts); - if (exec->status == 0) - return(1); - if (exec->status == -1) - return(0); - return(exec->status); -} - -/************************************************************************ - * * - * Progressive interface to the verifyer one atom at a time * - * * - ************************************************************************/ - -/** - * xmlRegNewExecCtxt: - * @comp: a precompiled regular expression - * @callback: a callback function used for handling progresses in the - * automata matching phase - * @data: the context data associated to the callback in this context - * - * Build a context used for progressive evaluation of a regexp. - * - * Returns the new context - */ -xmlRegExecCtxtPtr -xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) { - xmlRegExecCtxtPtr exec; - - if (comp == NULL) - return(NULL); - exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt)); - if (exec == NULL) { - return(NULL); - } - memset(exec, 0, sizeof(xmlRegExecCtxt)); - exec->inputString = NULL; - exec->index = 0; - exec->determinist = 1; - exec->maxRollbacks = 0; - exec->nbRollbacks = 0; - exec->rollbacks = NULL; - exec->status = 0; - exec->comp = comp; - if (comp->compact == NULL) - exec->state = comp->states[0]; - exec->transno = 0; - exec->transcount = 0; - exec->callback = callback; - exec->data = data; - if (comp->nbCounters > 0) { - exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)); - if (exec->counts == NULL) { - xmlFree(exec); - return(NULL); - } - memset(exec->counts, 0, comp->nbCounters * sizeof(int)); - } else - exec->counts = NULL; - exec->inputStackMax = 0; - exec->inputStackNr = 0; - exec->inputStack = NULL; - return(exec); -} - -/** - * xmlRegFreeExecCtxt: - * @exec: a regular expression evaulation context - * - * Free the structures associated to a regular expression evaulation context. - */ -void -xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) { - if (exec == NULL) - return; - - if (exec->rollbacks != NULL) { - if (exec->counts != NULL) { - int i; - - for (i = 0;i < exec->maxRollbacks;i++) - if (exec->rollbacks[i].counts != NULL) - xmlFree(exec->rollbacks[i].counts); - } - xmlFree(exec->rollbacks); - } - if (exec->counts != NULL) - xmlFree(exec->counts); - if (exec->inputStack != NULL) { - int i; - - for (i = 0;i < exec->inputStackNr;i++) { - if (exec->inputStack[i].value != NULL) - xmlFree(exec->inputStack[i].value); - } - xmlFree(exec->inputStack); - } - xmlFree(exec); -} - -static void -xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value, - void *data) { -#ifdef DEBUG_PUSH - printf("saving value: %d:%s\n", exec->inputStackNr, value); -#endif - if (exec->inputStackMax == 0) { - exec->inputStackMax = 4; - exec->inputStack = (xmlRegInputTokenPtr) - xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken)); - if (exec->inputStack == NULL) { - fprintf(stderr, "push input: allocation failed"); - exec->inputStackMax = 0; - return; - } - } else if (exec->inputStackNr + 1 >= exec->inputStackMax) { - xmlRegInputTokenPtr tmp; - - exec->inputStackMax *= 2; - tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack, - exec->inputStackMax * sizeof(xmlRegInputToken)); - if (tmp == NULL) { - fprintf(stderr, "push input: allocation failed"); - exec->inputStackMax /= 2; - return; - } - exec->inputStack = tmp; - } - exec->inputStack[exec->inputStackNr].value = xmlStrdup(value); - exec->inputStack[exec->inputStackNr].data = data; - exec->inputStackNr++; - exec->inputStack[exec->inputStackNr].value = NULL; - exec->inputStack[exec->inputStackNr].data = NULL; -} - - -/** - * xmlRegCompactPushString: - * @exec: a regexp execution context - * @comp: the precompiled exec with a compact table - * @value: a string token input - * @data: data associated to the token to reuse in callbacks - * - * Push one input token in the execution context - * - * Returns: 1 if the regexp reached a final state, 0 if non-final, and - * a negative value in case of error. - */ -static int -xmlRegCompactPushString(xmlRegExecCtxtPtr exec, - xmlRegexpPtr comp, - const xmlChar *value, - void *data) { - int state = exec->index; - int i, target; - - if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL)) - return(-1); - - if (value == NULL) { - /* - * are we at a final state ? - */ - if (comp->compact[state * (comp->nbstrings + 1)] == - XML_REGEXP_FINAL_STATE) - return(1); - return(0); - } - -#ifdef DEBUG_PUSH - printf("value pushed: %s\n", value); -#endif - - /* - * Examine all outside transition from current state - */ - for (i = 0;i < comp->nbstrings;i++) { - target = comp->compact[state * (comp->nbstrings + 1) + i + 1]; - if ((target > 0) && (target <= comp->nbstates)) { - target--; /* to avoid 0 */ - if (xmlStrEqual(comp->stringMap[i], value)) { - exec->index = target; - if ((exec->callback != NULL) && (comp->transdata != NULL)) { - exec->callback(exec->data, value, - comp->transdata[state * comp->nbstrings + i], data); - } -#ifdef DEBUG_PUSH - printf("entering state %d\n", target); -#endif - if (comp->compact[target * (comp->nbstrings + 1)] == - XML_REGEXP_FINAL_STATE) - return(1); - return(0); - } - } - } - /* - * Failed to find an exit transition out from current state for the - * current token - */ -#ifdef DEBUG_PUSH - printf("failed to find a transition for %s on state %d\n", value, state); -#endif - exec->status = -1; - return(-1); -} - -/** - * xmlRegExecPushString: - * @exec: a regexp execution context - * @value: a string token input - * @data: data associated to the token to reuse in callbacks - * - * Push one input token in the execution context - * - * Returns: 1 if the regexp reached a final state, 0 if non-final, and - * a negative value in case of error. - */ -int -xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value, - void *data) { - xmlRegTransPtr trans; - xmlRegAtomPtr atom; - int ret; - int final = 0; - - if (exec == NULL) - return(-1); - if (exec->comp == NULL) - return(-1); - if (exec->status != 0) - return(exec->status); - - if (exec->comp->compact != NULL) - return(xmlRegCompactPushString(exec, exec->comp, value, data)); - - if (value == NULL) { - if (exec->state->type == XML_REGEXP_FINAL_STATE) - return(1); - final = 1; - } - -#ifdef DEBUG_PUSH - printf("value pushed: %s\n", value); -#endif - /* - * If we have an active rollback stack push the new value there - * and get back to where we were left - */ - if ((value != NULL) && (exec->inputStackNr > 0)) { - xmlFARegExecSaveInputString(exec, value, data); - value = exec->inputStack[exec->index].value; - data = exec->inputStack[exec->index].data; -#ifdef DEBUG_PUSH - printf("value loaded: %s\n", value); -#endif - } - - while ((exec->status == 0) && - ((value != NULL) || - ((final == 1) && - (exec->state->type != XML_REGEXP_FINAL_STATE)))) { - - /* - * End of input on non-terminal state, rollback, however we may - * still have epsilon like transition for counted transitions - * on counters, in that case don't break too early. - */ - if ((value == NULL) && (exec->counts == NULL)) - goto rollback; - - exec->transcount = 0; - for (;exec->transno < exec->state->nbTrans;exec->transno++) { - trans = &exec->state->trans[exec->transno]; - if (trans->to < 0) - continue; - atom = trans->atom; - ret = 0; - if (trans->count == REGEXP_ALL_LAX_COUNTER) { - int i; - int count; - xmlRegTransPtr t; - xmlRegCounterPtr counter; - - ret = 0; - -#ifdef DEBUG_PUSH - printf("testing all lax %d\n", trans->count); -#endif - /* - * Check all counted transitions from the current state - */ - if ((value == NULL) && (final)) { - ret = 1; - } else if (value != NULL) { - for (i = 0;i < exec->state->nbTrans;i++) { - t = &exec->state->trans[i]; - if ((t->counter < 0) || (t == trans)) - continue; - counter = &exec->comp->counters[t->counter]; - count = exec->counts[t->counter]; - if ((count < counter->max) && - (t->atom != NULL) && - (xmlStrEqual(value, t->atom->valuep))) { - ret = 0; - break; - } - if ((count >= counter->min) && - (count < counter->max) && - (xmlStrEqual(value, t->atom->valuep))) { - ret = 1; - break; - } - } - } - } else if (trans->count == REGEXP_ALL_COUNTER) { - int i; - int count; - xmlRegTransPtr t; - xmlRegCounterPtr counter; - - ret = 1; - -#ifdef DEBUG_PUSH - printf("testing all %d\n", trans->count); -#endif - /* - * Check all counted transitions from the current state - */ - for (i = 0;i < exec->state->nbTrans;i++) { - t = &exec->state->trans[i]; - if ((t->counter < 0) || (t == trans)) - continue; - counter = &exec->comp->counters[t->counter]; - count = exec->counts[t->counter]; - if ((count < counter->min) || (count > counter->max)) { - ret = 0; - break; - } - } - } else if (trans->count >= 0) { - int count; - xmlRegCounterPtr counter; - - /* - * A counted transition. - */ - - count = exec->counts[trans->count]; - counter = &exec->comp->counters[trans->count]; -#ifdef DEBUG_PUSH - printf("testing count %d: val %d, min %d, max %d\n", - trans->count, count, counter->min, counter->max); -#endif - ret = ((count >= counter->min) && (count <= counter->max)); - } else if (atom == NULL) { - fprintf(stderr, "epsilon transition left at runtime\n"); - exec->status = -2; - break; - } else if (value != NULL) { - ret = xmlStrEqual(value, atom->valuep); - if ((ret == 1) && (trans->counter >= 0)) { - xmlRegCounterPtr counter; - int count; - - count = exec->counts[trans->counter]; - counter = &exec->comp->counters[trans->counter]; - if (count >= counter->max) - ret = 0; - } - - if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { - xmlRegStatePtr to = exec->comp->states[trans->to]; - - /* - * this is a multiple input sequence - */ - if (exec->state->nbTrans > exec->transno + 1) { - if (exec->inputStackNr <= 0) { - xmlFARegExecSaveInputString(exec, value, data); - } - xmlFARegExecSave(exec); - } - exec->transcount = 1; - do { - /* - * Try to progress as much as possible on the input - */ - if (exec->transcount == atom->max) { - break; - } - exec->index++; - value = exec->inputStack[exec->index].value; - data = exec->inputStack[exec->index].data; -#ifdef DEBUG_PUSH - printf("value loaded: %s\n", value); -#endif - - /* - * End of input: stop here - */ - if (value == NULL) { - exec->index --; - break; - } - if (exec->transcount >= atom->min) { - int transno = exec->transno; - xmlRegStatePtr state = exec->state; - - /* - * The transition is acceptable save it - */ - exec->transno = -1; /* trick */ - exec->state = to; - if (exec->inputStackNr <= 0) { - xmlFARegExecSaveInputString(exec, value, data); - } - xmlFARegExecSave(exec); - exec->transno = transno; - exec->state = state; - } - ret = xmlStrEqual(value, atom->valuep); - exec->transcount++; - } while (ret == 1); - if (exec->transcount < atom->min) - ret = 0; - - /* - * If the last check failed but one transition was found - * possible, rollback - */ - if (ret < 0) - ret = 0; - if (ret == 0) { - goto rollback; - } - } - } - if (ret == 1) { - if ((exec->callback != NULL) && (atom != NULL)) { - exec->callback(exec->data, atom->valuep, - atom->data, data); - } - if (exec->state->nbTrans > exec->transno + 1) { - if (exec->inputStackNr <= 0) { - xmlFARegExecSaveInputString(exec, value, data); - } - xmlFARegExecSave(exec); - } - if (trans->counter >= 0) { -#ifdef DEBUG_PUSH - printf("Increasing count %d\n", trans->counter); -#endif - exec->counts[trans->counter]++; - } -#ifdef DEBUG_PUSH - printf("entering state %d\n", trans->to); -#endif - exec->state = exec->comp->states[trans->to]; - exec->transno = 0; - if (trans->atom != NULL) { - if (exec->inputStack != NULL) { - exec->index++; - if (exec->index < exec->inputStackNr) { - value = exec->inputStack[exec->index].value; - data = exec->inputStack[exec->index].data; -#ifdef DEBUG_PUSH - printf("value loaded: %s\n", value); -#endif - } else { - value = NULL; - data = NULL; -#ifdef DEBUG_PUSH - printf("end of input\n"); -#endif - } - } else { - value = NULL; - data = NULL; -#ifdef DEBUG_PUSH - printf("end of input\n"); -#endif - } - } - goto progress; - } else if (ret < 0) { - exec->status = -4; - break; - } - } - if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { -rollback: - /* - * Failed to find a way out - */ - exec->determinist = 0; - xmlFARegExecRollBack(exec); - if (exec->status == 0) { - value = exec->inputStack[exec->index].value; - data = exec->inputStack[exec->index].data; -#ifdef DEBUG_PUSH - printf("value loaded: %s\n", value); -#endif - } - } -progress: - continue; - } - if (exec->status == 0) { - return(exec->state->type == XML_REGEXP_FINAL_STATE); - } - return(exec->status); -} - -#if 0 -static int -xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) { - xmlRegTransPtr trans; - xmlRegAtomPtr atom; - int ret; - int codepoint, len; - - if (exec == NULL) - return(-1); - if (exec->status != 0) - return(exec->status); - - while ((exec->status == 0) && - ((exec->inputString[exec->index] != 0) || - (exec->state->type != XML_REGEXP_FINAL_STATE))) { - - /* - * End of input on non-terminal state, rollback, however we may - * still have epsilon like transition for counted transitions - * on counters, in that case don't break too early. - */ - if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) - goto rollback; - - exec->transcount = 0; - for (;exec->transno < exec->state->nbTrans;exec->transno++) { - trans = &exec->state->trans[exec->transno]; - if (trans->to < 0) - continue; - atom = trans->atom; - ret = 0; - if (trans->count >= 0) { - int count; - xmlRegCounterPtr counter; - - /* - * A counted transition. - */ - - count = exec->counts[trans->count]; - counter = &exec->comp->counters[trans->count]; -#ifdef DEBUG_REGEXP_EXEC - printf("testing count %d: val %d, min %d, max %d\n", - trans->count, count, counter->min, counter->max); -#endif - ret = ((count >= counter->min) && (count <= counter->max)); - } else if (atom == NULL) { - fprintf(stderr, "epsilon transition left at runtime\n"); - exec->status = -2; - break; - } else if (exec->inputString[exec->index] != 0) { - codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len); - ret = xmlRegCheckCharacter(atom, codepoint); - if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) { - xmlRegStatePtr to = exec->comp->states[trans->to]; - - /* - * this is a multiple input sequence - */ - if (exec->state->nbTrans > exec->transno + 1) { - xmlFARegExecSave(exec); - } - exec->transcount = 1; - do { - /* - * Try to progress as much as possible on the input - */ - if (exec->transcount == atom->max) { - break; - } - exec->index += len; - /* - * End of input: stop here - */ - if (exec->inputString[exec->index] == 0) { - exec->index -= len; - break; - } - if (exec->transcount >= atom->min) { - int transno = exec->transno; - xmlRegStatePtr state = exec->state; - - /* - * The transition is acceptable save it - */ - exec->transno = -1; /* trick */ - exec->state = to; - xmlFARegExecSave(exec); - exec->transno = transno; - exec->state = state; - } - codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), - len); - ret = xmlRegCheckCharacter(atom, codepoint); - exec->transcount++; - } while (ret == 1); - if (exec->transcount < atom->min) - ret = 0; - - /* - * If the last check failed but one transition was found - * possible, rollback - */ - if (ret < 0) - ret = 0; - if (ret == 0) { - goto rollback; - } - } - } - if (ret == 1) { - if (exec->state->nbTrans > exec->transno + 1) { - xmlFARegExecSave(exec); - } - if (trans->counter >= 0) { -#ifdef DEBUG_REGEXP_EXEC - printf("Increasing count %d\n", trans->counter); -#endif - exec->counts[trans->counter]++; - } -#ifdef DEBUG_REGEXP_EXEC - printf("entering state %d\n", trans->to); -#endif - exec->state = exec->comp->states[trans->to]; - exec->transno = 0; - if (trans->atom != NULL) { - exec->index += len; - } - goto progress; - } else if (ret < 0) { - exec->status = -4; - break; - } - } - if ((exec->transno != 0) || (exec->state->nbTrans == 0)) { -rollback: - /* - * Failed to find a way out - */ - exec->determinist = 0; - xmlFARegExecRollBack(exec); - } -progress: - continue; - } -} -#endif -/************************************************************************ - * * - * Parser for the Shemas Datatype Regular Expressions * - * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs * - * * - ************************************************************************/ - -/** - * xmlFAIsChar: - * @ctxt: a regexp parser context - * - * [10] Char ::= [^.\?*+()|#x5B#x5D] - */ -static int -xmlFAIsChar(xmlRegParserCtxtPtr ctxt) { - int cur; - int len; - - cur = CUR_SCHAR(ctxt->cur, len); - if ((cur == '.') || (cur == '\\') || (cur == '?') || - (cur == '*') || (cur == '+') || (cur == '(') || - (cur == ')') || (cur == '|') || (cur == 0x5B) || - (cur == 0x5D) || (cur == 0)) - return(-1); - return(cur); -} - -/** - * xmlFAParseCharProp: - * @ctxt: a regexp parser context - * - * [27] charProp ::= IsCategory | IsBlock - * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation | - * Separators | Symbols | Others - * [29] Letters ::= 'L' [ultmo]? - * [30] Marks ::= 'M' [nce]? - * [31] Numbers ::= 'N' [dlo]? - * [32] Punctuation ::= 'P' [cdseifo]? - * [33] Separators ::= 'Z' [slp]? - * [34] Symbols ::= 'S' [mcko]? - * [35] Others ::= 'C' [cfon]? - * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+ - */ -static void -xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) { - int cur; - xmlRegAtomType type = 0; - xmlChar *blockName = NULL; - - cur = CUR; - if (cur == 'L') { - NEXT; - cur = CUR; - if (cur == 'u') { - NEXT; - type = XML_REGEXP_LETTER_UPPERCASE; - } else if (cur == 'l') { - NEXT; - type = XML_REGEXP_LETTER_LOWERCASE; - } else if (cur == 't') { - NEXT; - type = XML_REGEXP_LETTER_TITLECASE; - } else if (cur == 'm') { - NEXT; - type = XML_REGEXP_LETTER_MODIFIER; - } else if (cur == 'o') { - NEXT; - type = XML_REGEXP_LETTER_OTHERS; - } else { - type = XML_REGEXP_LETTER; - } - } else if (cur == 'M') { - NEXT; - cur = CUR; - if (cur == 'n') { - NEXT; - /* nonspacing */ - type = XML_REGEXP_MARK_NONSPACING; - } else if (cur == 'c') { - NEXT; - /* spacing combining */ - type = XML_REGEXP_MARK_SPACECOMBINING; - } else if (cur == 'e') { - NEXT; - /* enclosing */ - type = XML_REGEXP_MARK_ENCLOSING; - } else { - /* all marks */ - type = XML_REGEXP_MARK; - } - } else if (cur == 'N') { - NEXT; - cur = CUR; - if (cur == 'd') { - NEXT; - /* digital */ - type = XML_REGEXP_NUMBER_DECIMAL; - } else if (cur == 'l') { - NEXT; - /* letter */ - type = XML_REGEXP_NUMBER_LETTER; - } else if (cur == 'o') { - NEXT; - /* other */ - type = XML_REGEXP_NUMBER_OTHERS; - } else { - /* all numbers */ - type = XML_REGEXP_NUMBER; - } - } else if (cur == 'P') { - NEXT; - cur = CUR; - if (cur == 'c') { - NEXT; - /* connector */ - type = XML_REGEXP_PUNCT_CONNECTOR; - } else if (cur == 'd') { - NEXT; - /* dash */ - type = XML_REGEXP_PUNCT_DASH; - } else if (cur == 's') { - NEXT; - /* open */ - type = XML_REGEXP_PUNCT_OPEN; - } else if (cur == 'e') { - NEXT; - /* close */ - type = XML_REGEXP_PUNCT_CLOSE; - } else if (cur == 'i') { - NEXT; - /* initial quote */ - type = XML_REGEXP_PUNCT_INITQUOTE; - } else if (cur == 'f') { - NEXT; - /* final quote */ - type = XML_REGEXP_PUNCT_FINQUOTE; - } else if (cur == 'o') { - NEXT; - /* other */ - type = XML_REGEXP_PUNCT_OTHERS; - } else { - /* all punctuation */ - type = XML_REGEXP_PUNCT; - } - } else if (cur == 'Z') { - NEXT; - cur = CUR; - if (cur == 's') { - NEXT; - /* space */ - type = XML_REGEXP_SEPAR_SPACE; - } else if (cur == 'l') { - NEXT; - /* line */ - type = XML_REGEXP_SEPAR_LINE; - } else if (cur == 'p') { - NEXT; - /* paragraph */ - type = XML_REGEXP_SEPAR_PARA; - } else { - /* all separators */ - type = XML_REGEXP_SEPAR; - } - } else if (cur == 'S') { - NEXT; - cur = CUR; - if (cur == 'm') { - NEXT; - type = XML_REGEXP_SYMBOL_MATH; - /* math */ - } else if (cur == 'c') { - NEXT; - type = XML_REGEXP_SYMBOL_CURRENCY; - /* currency */ - } else if (cur == 'k') { - NEXT; - type = XML_REGEXP_SYMBOL_MODIFIER; - /* modifiers */ - } else if (cur == 'o') { - NEXT; - type = XML_REGEXP_SYMBOL_OTHERS; - /* other */ - } else { - /* all symbols */ - type = XML_REGEXP_SYMBOL; - } - } else if (cur == 'C') { - NEXT; - cur = CUR; - if (cur == 'c') { - NEXT; - /* control */ - type = XML_REGEXP_OTHER_CONTROL; - } else if (cur == 'f') { - NEXT; - /* format */ - type = XML_REGEXP_OTHER_FORMAT; - } else if (cur == 'o') { - NEXT; - /* private use */ - type = XML_REGEXP_OTHER_PRIVATE; - } else if (cur == 'n') { - NEXT; - /* not assigned */ - type = XML_REGEXP_OTHER_NA; - } else { - /* all others */ - type = XML_REGEXP_OTHER; - } - } else if (cur == 'I') { - const xmlChar *start; - NEXT; - cur = CUR; - if (cur != 's') { - ERROR("IsXXXX expected"); - return; - } - NEXT; - start = ctxt->cur; - cur = CUR; - if (((cur >= 'a') && (cur <= 'z')) || - ((cur >= 'A') && (cur <= 'Z')) || - ((cur >= '0') && (cur <= '9')) || - (cur == 0x2D)) { - NEXT; - cur = CUR; - while (((cur >= 'a') && (cur <= 'z')) || - ((cur >= 'A') && (cur <= 'Z')) || - ((cur >= '0') && (cur <= '9')) || - (cur == 0x2D)) { - NEXT; - cur = CUR; - } - } - type = XML_REGEXP_BLOCK_NAME; - blockName = xmlStrndup(start, ctxt->cur - start); - } else { - ERROR("Unknown char property"); - return; - } - if (ctxt->atom == NULL) { - ctxt->atom = xmlRegNewAtom(ctxt, type); - if (ctxt->atom != NULL) - ctxt->atom->valuep = blockName; - } else if (ctxt->atom->type == XML_REGEXP_RANGES) { - xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, - type, 0, 0, blockName); - } -} - -/** - * xmlFAParseCharClassEsc: - * @ctxt: a regexp parser context - * - * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) - * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E] - * [25] catEsc ::= '\p{' charProp '}' - * [26] complEsc ::= '\P{' charProp '}' - * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW]) - */ -static void -xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) { - int cur; - - if (CUR == '.') { - if (ctxt->atom == NULL) { - ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR); - } else if (ctxt->atom->type == XML_REGEXP_RANGES) { - xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, - XML_REGEXP_ANYCHAR, 0, 0, NULL); - } - NEXT; - return; - } - if (CUR != '\\') { - ERROR("Escaped sequence: expecting \\"); - return; - } - NEXT; - cur = CUR; - if (cur == 'p') { - NEXT; - if (CUR != '{') { - ERROR("Expecting '{'"); - return; - } - NEXT; - xmlFAParseCharProp(ctxt); - if (CUR != '}') { - ERROR("Expecting '}'"); - return; - } - NEXT; - } else if (cur == 'P') { - NEXT; - if (CUR != '{') { - ERROR("Expecting '{'"); - return; - } - NEXT; - xmlFAParseCharProp(ctxt); - ctxt->atom->neg = 1; - if (CUR != '}') { - ERROR("Expecting '}'"); - return; - } - NEXT; - } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') || - (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') || - (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') || - (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) || - (cur == 0x5E)) { - if (ctxt->atom == NULL) { - ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); - if (ctxt->atom != NULL) - ctxt->atom->codepoint = cur; - } else if (ctxt->atom->type == XML_REGEXP_RANGES) { - xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, - XML_REGEXP_CHARVAL, cur, cur, NULL); - } - NEXT; - } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') || - (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') || - (cur == 'w') || (cur == 'W')) { - xmlRegAtomType type = XML_REGEXP_ANYSPACE; - - switch (cur) { - case 's': - type = XML_REGEXP_ANYSPACE; - break; - case 'S': - type = XML_REGEXP_NOTSPACE; - break; - case 'i': - type = XML_REGEXP_INITNAME; - break; - case 'I': - type = XML_REGEXP_NOTINITNAME; - break; - case 'c': - type = XML_REGEXP_NAMECHAR; - break; - case 'C': - type = XML_REGEXP_NOTNAMECHAR; - break; - case 'd': - type = XML_REGEXP_DECIMAL; - break; - case 'D': - type = XML_REGEXP_NOTDECIMAL; - break; - case 'w': - type = XML_REGEXP_REALCHAR; - break; - case 'W': - type = XML_REGEXP_NOTREALCHAR; - break; - } - NEXT; - if (ctxt->atom == NULL) { - ctxt->atom = xmlRegNewAtom(ctxt, type); - } else if (ctxt->atom->type == XML_REGEXP_RANGES) { - xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, - type, 0, 0, NULL); - } - } -} - -/** - * xmlFAParseCharRef: - * @ctxt: a regexp parser context - * - * [19] XmlCharRef ::= ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' ) - */ -static int -xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) { - int ret = 0, cur; - - if ((CUR != '&') || (NXT(1) != '#')) - return(-1); - NEXT; - NEXT; - cur = CUR; - if (cur == 'x') { - NEXT; - cur = CUR; - if (((cur >= '0') && (cur <= '9')) || - ((cur >= 'a') && (cur <= 'f')) || - ((cur >= 'A') && (cur <= 'F'))) { - while (((cur >= '0') && (cur <= '9')) || - ((cur >= 'A') && (cur <= 'F'))) { - if ((cur >= '0') && (cur <= '9')) - ret = ret * 16 + cur - '0'; - else if ((cur >= 'a') && (cur <= 'f')) - ret = ret * 16 + 10 + (cur - 'a'); - else - ret = ret * 16 + 10 + (cur - 'A'); - NEXT; - cur = CUR; - } - } else { - ERROR("Char ref: expecting [0-9A-F]"); - return(-1); - } - } else { - if ((cur >= '0') && (cur <= '9')) { - while ((cur >= '0') && (cur <= '9')) { - ret = ret * 10 + cur - '0'; - NEXT; - cur = CUR; - } - } else { - ERROR("Char ref: expecting [0-9]"); - return(-1); - } - } - if (cur != ';') { - ERROR("Char ref: expecting ';'"); - return(-1); - } else { - NEXT; - } - return(ret); -} - -/** - * xmlFAParseCharRange: - * @ctxt: a regexp parser context - * - * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash - * [18] seRange ::= charOrEsc '-' charOrEsc - * [20] charOrEsc ::= XmlChar | SingleCharEsc - * [21] XmlChar ::= [^\#x2D#x5B#x5D] - * [22] XmlCharIncDash ::= [^\#x5B#x5D] - */ -static void -xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) { - int cur; - int start = -1; - int end = -1; - - if ((CUR == '&') && (NXT(1) == '#')) { - end = start = xmlFAParseCharRef(ctxt); - xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, - XML_REGEXP_CHARVAL, start, end, NULL); - return; - } - cur = CUR; - if (cur == '\\') { - NEXT; - cur = CUR; - switch (cur) { - case 'n': start = 0xA; break; - case 'r': start = 0xD; break; - case 't': start = 0x9; break; - case '\\': case '|': case '.': case '-': case '^': case '?': - case '*': case '+': case '{': case '}': case '(': case ')': - case '[': case ']': - start = cur; break; - default: - ERROR("Invalid escape value"); - return; - } - end = start; - } else if ((cur != 0x5B) && (cur != 0x5D)) { - end = start = cur; - } else { - ERROR("Expecting a char range"); - return; - } - NEXT; - if (start == '-') { - return; - } - cur = CUR; - if (cur != '-') { - xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, - XML_REGEXP_CHARVAL, start, end, NULL); - return; - } - NEXT; - cur = CUR; - if (cur == '\\') { - NEXT; - cur = CUR; - switch (cur) { - case 'n': end = 0xA; break; - case 'r': end = 0xD; break; - case 't': end = 0x9; break; - case '\\': case '|': case '.': case '-': case '^': case '?': - case '*': case '+': case '{': case '}': case '(': case ')': - case '[': case ']': - end = cur; break; - default: - ERROR("Invalid escape value"); - return; - } - } else if ((cur != 0x5B) && (cur != 0x5D)) { - end = cur; - } else { - ERROR("Expecting the end of a char range"); - return; - } - NEXT; - /* TODO check that the values are acceptable character ranges for XML */ - if (end < start) { - ERROR("End of range is before start of range"); - } else { - xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg, - XML_REGEXP_CHARVAL, start, end, NULL); - } - return; -} - -/** - * xmlFAParsePosCharGroup: - * @ctxt: a regexp parser context - * - * [14] posCharGroup ::= ( charRange | charClassEsc )+ - */ -static void -xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) { - do { - if ((CUR == '\\') || (CUR == '.')) { - xmlFAParseCharClassEsc(ctxt); - } else { - xmlFAParseCharRange(ctxt); - } - } while ((CUR != ']') && (CUR != '^') && (CUR != '-') && - (ctxt->error == 0)); -} - -/** - * xmlFAParseCharGroup: - * @ctxt: a regexp parser context - * - * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub - * [15] negCharGroup ::= '^' posCharGroup - * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr - * [12] charClassExpr ::= '[' charGroup ']' - */ -static void -xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) { - int n = ctxt->neg; - while ((CUR != ']') && (ctxt->error == 0)) { - if (CUR == '^') { - int neg = ctxt->neg; - - NEXT; - ctxt->neg = !ctxt->neg; - xmlFAParsePosCharGroup(ctxt); - ctxt->neg = neg; - } else if (CUR == '-') { - NEXT; - ctxt->neg = !ctxt->neg; - if (CUR != '[') { - ERROR("charClassExpr: '[' expected"); - break; - } - NEXT; - xmlFAParseCharGroup(ctxt); - if (CUR == ']') { - NEXT; - } else { - ERROR("charClassExpr: ']' expected"); - break; - } - break; - } else if (CUR != ']') { - xmlFAParsePosCharGroup(ctxt); - } - } - ctxt->neg = n; -} - -/** - * xmlFAParseCharClass: - * @ctxt: a regexp parser context - * - * [11] charClass ::= charClassEsc | charClassExpr - * [12] charClassExpr ::= '[' charGroup ']' - */ -static void -xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) { - if (CUR == '[') { - NEXT; - ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES); - if (ctxt->atom == NULL) - return; - xmlFAParseCharGroup(ctxt); - if (CUR == ']') { - NEXT; - } else { - ERROR("xmlFAParseCharClass: ']' expected"); - } - } else { - xmlFAParseCharClassEsc(ctxt); - } -} - -/** - * xmlFAParseQuantExact: - * @ctxt: a regexp parser context - * - * [8] QuantExact ::= [0-9]+ - */ -static int -xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) { - int ret = 0; - int ok = 0; - - while ((CUR >= '0') && (CUR <= '9')) { - ret = ret * 10 + (CUR - '0'); - ok = 1; - NEXT; - } - if (ok != 1) { - return(-1); - } - return(ret); -} - -/** - * xmlFAParseQuantifier: - * @ctxt: a regexp parser context - * - * [4] quantifier ::= [?*+] | ( '{' quantity '}' ) - * [5] quantity ::= quantRange | quantMin | QuantExact - * [6] quantRange ::= QuantExact ',' QuantExact - * [7] quantMin ::= QuantExact ',' - * [8] QuantExact ::= [0-9]+ - */ -static int -xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) { - int cur; - - cur = CUR; - if ((cur == '?') || (cur == '*') || (cur == '+')) { - if (ctxt->atom != NULL) { - if (cur == '?') - ctxt->atom->quant = XML_REGEXP_QUANT_OPT; - else if (cur == '*') - ctxt->atom->quant = XML_REGEXP_QUANT_MULT; - else if (cur == '+') - ctxt->atom->quant = XML_REGEXP_QUANT_PLUS; - } - NEXT; - return(1); - } - if (cur == '{') { - int min = 0, max = 0; - - NEXT; - cur = xmlFAParseQuantExact(ctxt); - if (cur >= 0) - min = cur; - if (CUR == ',') { - NEXT; - cur = xmlFAParseQuantExact(ctxt); - if (cur >= 0) - max = cur; - } - if (CUR == '}') { - NEXT; - } else { - ERROR("Unterminated quantifier"); - } - if (max == 0) - max = min; - if (ctxt->atom != NULL) { - ctxt->atom->quant = XML_REGEXP_QUANT_RANGE; - ctxt->atom->min = min; - ctxt->atom->max = max; - } - return(1); - } - return(0); -} - -/** - * xmlFAParseAtom: - * @ctxt: a regexp parser context - * - * [9] atom ::= Char | charClass | ( '(' regExp ')' ) - */ -static int -xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) { - int codepoint, len; - - codepoint = xmlFAIsChar(ctxt); - if (codepoint > 0) { - ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL); - if (ctxt->atom == NULL) - return(-1); - codepoint = CUR_SCHAR(ctxt->cur, len); - ctxt->atom->codepoint = codepoint; - NEXTL(len); - return(1); - } else if (CUR == '|') { - return(0); - } else if (CUR == 0) { - return(0); - } else if (CUR == ')') { - return(0); - } else if (CUR == '(') { - xmlRegStatePtr start, oldend; - - NEXT; - xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL); - start = ctxt->state; - oldend = ctxt->end; - ctxt->end = NULL; - ctxt->atom = NULL; - xmlFAParseRegExp(ctxt, 0); - if (CUR == ')') { - NEXT; - } else { - ERROR("xmlFAParseAtom: expecting ')'"); - } - ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG); - if (ctxt->atom == NULL) - return(-1); - ctxt->atom->start = start; - ctxt->atom->stop = ctxt->state; - ctxt->end = oldend; - return(1); - } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) { - xmlFAParseCharClass(ctxt); - return(1); - } - return(0); -} - -/** - * xmlFAParsePiece: - * @ctxt: a regexp parser context - * - * [3] piece ::= atom quantifier? - */ -static int -xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) { - int ret; - - ctxt->atom = NULL; - ret = xmlFAParseAtom(ctxt); - if (ret == 0) - return(0); - if (ctxt->atom == NULL) { - ERROR("internal: no atom generated"); - } - xmlFAParseQuantifier(ctxt); - return(1); -} - -/** - * xmlFAParseBranch: - * @ctxt: a regexp parser context - * @first: is taht the first - * - * [2] branch ::= piece* - */ -static void -xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, int first) { - xmlRegStatePtr previous; - xmlRegAtomPtr prevatom = NULL; - int ret; - - previous = ctxt->state; - ret = xmlFAParsePiece(ctxt); - if (ret != 0) { - if (first) { - xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom); - previous = ctxt->state; - } else { - prevatom = ctxt->atom; - } - ctxt->atom = NULL; - } - while ((ret != 0) && (ctxt->error == 0)) { - ret = xmlFAParsePiece(ctxt); - if (ret != 0) { - if (first) { - xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom); - } else { - xmlFAGenerateTransitions(ctxt, previous, NULL, prevatom); - prevatom = ctxt->atom; - } - previous = ctxt->state; - ctxt->atom = NULL; - } - } - if (!first) { - xmlFAGenerateTransitions(ctxt, previous, ctxt->end, prevatom); - } -} - -/** - * xmlFAParseRegExp: - * @ctxt: a regexp parser context - * @top: is that the top-level expressions ? - * - * [1] regExp ::= branch ( '|' branch )* - */ -static void -xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) { - xmlRegStatePtr start, end, oldend; - - oldend = ctxt->end; - - start = ctxt->state; - xmlFAParseBranch(ctxt, (ctxt->end == NULL)); - if (CUR != '|') { - ctxt->end = ctxt->state; - return; - } - end = ctxt->state; - while ((CUR == '|') && (ctxt->error == 0)) { - NEXT; - ctxt->state = start; - ctxt->end = end; - xmlFAParseBranch(ctxt, 0); - } - if (!top) - ctxt->end = oldend; -} - -/************************************************************************ - * * - * The basic API * - * * - ************************************************************************/ - -/** - * xmlRegexpPrint: - * @output: the file for the output debug - * @regexp: the compiled regexp - * - * Print the content of the compiled regular expression - */ -void -xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) { - int i; - - fprintf(output, " regexp: "); - if (regexp == NULL) { - fprintf(output, "NULL\n"); - return; - } - fprintf(output, "'%s' ", regexp->string); - fprintf(output, "\n"); - fprintf(output, "%d atoms:\n", regexp->nbAtoms); - for (i = 0;i < regexp->nbAtoms; i++) { - fprintf(output, " %02d ", i); - xmlRegPrintAtom(output, regexp->atoms[i]); - } - fprintf(output, "%d states:", regexp->nbStates); - fprintf(output, "\n"); - for (i = 0;i < regexp->nbStates; i++) { - xmlRegPrintState(output, regexp->states[i]); - } - fprintf(output, "%d counters:\n", regexp->nbCounters); - for (i = 0;i < regexp->nbCounters; i++) { - fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min, - regexp->counters[i].max); - } -} - -/** - * xmlRegexpCompile: - * @regexp: a regular expression string - * - * Parses a regular expression conforming to XML Schemas Part 2 Datatype - * Appendix F and build an automata suitable for testing strings against - * that regular expression - * - * Returns the compiled expression or NULL in case of error - */ -xmlRegexpPtr -xmlRegexpCompile(const xmlChar *regexp) { - xmlRegexpPtr ret; - xmlRegParserCtxtPtr ctxt; - - ctxt = xmlRegNewParserCtxt(regexp); - if (ctxt == NULL) - return(NULL); - - /* initialize the parser */ - ctxt->end = NULL; - ctxt->start = ctxt->state = xmlRegNewState(ctxt); - xmlRegStatePush(ctxt, ctxt->start); - - /* parse the expression building an automata */ - xmlFAParseRegExp(ctxt, 1); - if (CUR != 0) { - ERROR("xmlFAParseRegExp: extra characters"); - } - ctxt->end = ctxt->state; - ctxt->start->type = XML_REGEXP_START_STATE; - ctxt->end->type = XML_REGEXP_FINAL_STATE; - - /* remove the Epsilon except for counted transitions */ - xmlFAEliminateEpsilonTransitions(ctxt); - - - if (ctxt->error != 0) { - xmlRegFreeParserCtxt(ctxt); - return(NULL); - } - ret = xmlRegEpxFromParse(ctxt); - xmlRegFreeParserCtxt(ctxt); - return(ret); -} - -/** - * xmlRegexpExec: - * @comp: the compiled regular expression - * @content: the value to check against the regular expression - * - * Check if the regular expression generate the value - * - * Returns 1 if it matches, 0 if not and a negativa value in case of error - */ -int -xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) { - if ((comp == NULL) || (content == NULL)) - return(-1); - return(xmlFARegExec(comp, content)); -} - -/** - * xmlRegexpIsDeterminist: - * @comp: the compiled regular expression - * - * Check if the regular expression is determinist - * - * Returns 1 if it yes, 0 if not and a negativa value in case of error - */ -int -xmlRegexpIsDeterminist(xmlRegexpPtr comp) { - xmlAutomataPtr am; - int ret; - - if (comp == NULL) - return(-1); - if (comp->determinist != -1) - return(comp->determinist); - - am = xmlNewAutomata(); - if (am->states != NULL) { - int i; - - for (i = 0;i < am->nbStates;i++) - xmlRegFreeState(am->states[i]); - xmlFree(am->states); - } - am->nbAtoms = comp->nbAtoms; - am->atoms = comp->atoms; - am->nbStates = comp->nbStates; - am->states = comp->states; - am->determinist = -1; - ret = xmlFAComputesDeterminism(am); - am->atoms = NULL; - am->states = NULL; - xmlFreeAutomata(am); - return(ret); -} - -/** - * xmlRegFreeRegexp: - * @regexp: the regexp - * - * Free a regexp - */ -void -xmlRegFreeRegexp(xmlRegexpPtr regexp) { - int i; - if (regexp == NULL) - return; - - if (regexp->string != NULL) - xmlFree(regexp->string); - if (regexp->states != NULL) { - for (i = 0;i < regexp->nbStates;i++) - xmlRegFreeState(regexp->states[i]); - xmlFree(regexp->states); - } - if (regexp->atoms != NULL) { - for (i = 0;i < regexp->nbAtoms;i++) - xmlRegFreeAtom(regexp->atoms[i]); - xmlFree(regexp->atoms); - } - if (regexp->counters != NULL) - xmlFree(regexp->counters); - if (regexp->compact != NULL) - xmlFree(regexp->compact); - if (regexp->transdata != NULL) - xmlFree(regexp->transdata); - if (regexp->stringMap != NULL) { - for (i = 0; i < regexp->nbstrings;i++) - xmlFree(regexp->stringMap[i]); - xmlFree(regexp->stringMap); - } - - xmlFree(regexp); -} - -#ifdef LIBXML_AUTOMATA_ENABLED -/************************************************************************ - * * - * The Automata interface * - * * - ************************************************************************/ - -/** - * xmlNewAutomata: - * - * Create a new automata - * - * Returns the new object or NULL in case of failure - */ -xmlAutomataPtr -xmlNewAutomata(void) { - xmlAutomataPtr ctxt; - - ctxt = xmlRegNewParserCtxt(NULL); - if (ctxt == NULL) - return(NULL); - - /* initialize the parser */ - ctxt->end = NULL; - ctxt->start = ctxt->state = xmlRegNewState(ctxt); - xmlRegStatePush(ctxt, ctxt->start); - - return(ctxt); -} - -/** - * xmlFreeAutomata: - * @am: an automata - * - * Free an automata - */ -void -xmlFreeAutomata(xmlAutomataPtr am) { - if (am == NULL) - return; - xmlRegFreeParserCtxt(am); -} - -/** - * xmlAutomataGetInitState: - * @am: an automata - * - * Initial state lookup - * - * Returns the initial state of the automata - */ -xmlAutomataStatePtr -xmlAutomataGetInitState(xmlAutomataPtr am) { - if (am == NULL) - return(NULL); - return(am->start); -} - -/** - * xmlAutomataSetFinalState: - * @am: an automata - * @state: a state in this automata - * - * Makes that state a final state - * - * Returns 0 or -1 in case of error - */ -int -xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) { - if ((am == NULL) || (state == NULL)) - return(-1); - state->type = XML_REGEXP_FINAL_STATE; - return(0); -} - -/** - * xmlAutomataNewTransition: - * @am: an automata - * @from: the starting point of the transition - * @to: the target point of the transition or NULL - * @token: the input string associated to that transition - * @data: data passed to the callback function if the transition is activated - * - * If @to is NULL, this create first a new target state in the automata - * and then adds a transition from the @from state to the target state - * activated by the value of @token - * - * Returns the target state or NULL in case of error - */ -xmlAutomataStatePtr -xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from, - xmlAutomataStatePtr to, const xmlChar *token, - void *data) { - xmlRegAtomPtr atom; - - if ((am == NULL) || (from == NULL) || (token == NULL)) - return(NULL); - atom = xmlRegNewAtom(am, XML_REGEXP_STRING); - atom->data = data; - if (atom == NULL) - return(NULL); - atom->valuep = xmlStrdup(token); - - xmlFAGenerateTransitions(am, from, to, atom); - if (to == NULL) - return(am->state); - return(to); -} - -/** - * xmlAutomataNewCountTrans: - * @am: an automata - * @from: the starting point of the transition - * @to: the target point of the transition or NULL - * @token: the input string associated to that transition - * @min: the minimum successive occurences of token - * @max: the maximum successive occurences of token - * @data: data associated to the transition - * - * If @to is NULL, this create first a new target state in the automata - * and then adds a transition from the @from state to the target state - * activated by a succession of input of value @token and whose number - * is between @min and @max - * - * Returns the target state or NULL in case of error - */ -xmlAutomataStatePtr -xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, - xmlAutomataStatePtr to, const xmlChar *token, - int min, int max, void *data) { - xmlRegAtomPtr atom; - - if ((am == NULL) || (from == NULL) || (token == NULL)) - return(NULL); - if (min < 0) - return(NULL); - if ((max < min) || (max < 1)) - return(NULL); - atom = xmlRegNewAtom(am, XML_REGEXP_STRING); - if (atom == NULL) - return(NULL); - atom->valuep = xmlStrdup(token); - atom->data = data; - if (min == 0) - atom->min = 1; - else - atom->min = min; - atom->max = max; - - xmlFAGenerateTransitions(am, from, to, atom); - if (to == NULL) - to = am->state; - if (to == NULL) - return(NULL); - if (min == 0) - xmlFAGenerateEpsilonTransition(am, from, to); - return(to); -} - -/** - * xmlAutomataNewOnceTrans: - * @am: an automata - * @from: the starting point of the transition - * @to: the target point of the transition or NULL - * @token: the input string associated to that transition - * @min: the minimum successive occurences of token - * @max: the maximum successive occurences of token - * @data: data associated to the transition - * - * If @to is NULL, this create first a new target state in the automata - * and then adds a transition from the @from state to the target state - * activated by a succession of input of value @token and whose number - * is between @min and @max, moreover that transistion can only be crossed - * once. - * - * Returns the target state or NULL in case of error - */ -xmlAutomataStatePtr -xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, - xmlAutomataStatePtr to, const xmlChar *token, - int min, int max, void *data) { - xmlRegAtomPtr atom; - int counter; - - if ((am == NULL) || (from == NULL) || (token == NULL)) - return(NULL); - if (min < 1) - return(NULL); - if ((max < min) || (max < 1)) - return(NULL); - atom = xmlRegNewAtom(am, XML_REGEXP_STRING); - if (atom == NULL) - return(NULL); - atom->valuep = xmlStrdup(token); - atom->data = data; - atom->quant = XML_REGEXP_QUANT_ONCEONLY; - if (min == 0) - atom->min = 1; - else - atom->min = min; - atom->max = max; - /* - * associate a counter to the transition. - */ - counter = xmlRegGetCounter(am); - am->counters[counter].min = 1; - am->counters[counter].max = 1; - - /* xmlFAGenerateTransitions(am, from, to, atom); */ - if (to == NULL) { - to = xmlRegNewState(am); - xmlRegStatePush(am, to); - } - xmlRegStateAddTrans(am, from, atom, to, counter, -1); - xmlRegAtomPush(am, atom); - am->state = to; - if (to == NULL) - to = am->state; - if (to == NULL) - return(NULL); - return(to); -} - -/** - * xmlAutomataNewState: - * @am: an automata - * - * Create a new disconnected state in the automata - * - * Returns the new state or NULL in case of error - */ -xmlAutomataStatePtr -xmlAutomataNewState(xmlAutomataPtr am) { - xmlAutomataStatePtr to; - - if (am == NULL) - return(NULL); - to = xmlRegNewState(am); - xmlRegStatePush(am, to); - return(to); -} - -/** - * xmlAutomataNewEpsilon: - * @am: an automata - * @from: the starting point of the transition - * @to: the target point of the transition or NULL - * - * If @to is NULL, this create first a new target state in the automata - * and then adds a an epsilon transition from the @from state to the - * target state - * - * Returns the target state or NULL in case of error - */ -xmlAutomataStatePtr -xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from, - xmlAutomataStatePtr to) { - if ((am == NULL) || (from == NULL)) - return(NULL); - xmlFAGenerateEpsilonTransition(am, from, to); - if (to == NULL) - return(am->state); - return(to); -} - -/** - * xmlAutomataNewAllTrans: - * @am: an automata - * @from: the starting point of the transition - * @to: the target point of the transition or NULL - * @lax: allow to transition if not all all transitions have been activated - * - * If @to is NULL, this create first a new target state in the automata - * and then adds a an ALL transition from the @from state to the - * target state. That transition is an epsilon transition allowed only when - * all transitions from the @from node have been activated. - * - * Returns the target state or NULL in case of error - */ -xmlAutomataStatePtr -xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, - xmlAutomataStatePtr to, int lax) { - if ((am == NULL) || (from == NULL)) - return(NULL); - xmlFAGenerateAllTransition(am, from, to, lax); - if (to == NULL) - return(am->state); - return(to); -} - -/** - * xmlAutomataNewCounter: - * @am: an automata - * @min: the minimal value on the counter - * @max: the maximal value on the counter - * - * Create a new counter - * - * Returns the counter number or -1 in case of error - */ -int -xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) { - int ret; - - if (am == NULL) - return(-1); - - ret = xmlRegGetCounter(am); - if (ret < 0) - return(-1); - am->counters[ret].min = min; - am->counters[ret].max = max; - return(ret); -} - -/** - * xmlAutomataNewCountedTrans: - * @am: an automata - * @from: the starting point of the transition - * @to: the target point of the transition or NULL - * @counter: the counter associated to that transition - * - * If @to is NULL, this create first a new target state in the automata - * and then adds an epsilon transition from the @from state to the target state - * which will increment the counter provided - * - * Returns the target state or NULL in case of error - */ -xmlAutomataStatePtr -xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, - xmlAutomataStatePtr to, int counter) { - if ((am == NULL) || (from == NULL) || (counter < 0)) - return(NULL); - xmlFAGenerateCountedEpsilonTransition(am, from, to, counter); - if (to == NULL) - return(am->state); - return(to); -} - -/** - * xmlAutomataNewCounterTrans: - * @am: an automata - * @from: the starting point of the transition - * @to: the target point of the transition or NULL - * @counter: the counter associated to that transition - * - * If @to is NULL, this create first a new target state in the automata - * and then adds an epsilon transition from the @from state to the target state - * which will be allowed only if the counter is within the right range. - * - * Returns the target state or NULL in case of error - */ -xmlAutomataStatePtr -xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from, - xmlAutomataStatePtr to, int counter) { - if ((am == NULL) || (from == NULL) || (counter < 0)) - return(NULL); - xmlFAGenerateCountedTransition(am, from, to, counter); - if (to == NULL) - return(am->state); - return(to); -} - -/** - * xmlAutomataCompile: - * @am: an automata - * - * Compile the automata into a Reg Exp ready for being executed. - * The automata should be free after this point. - * - * Returns the compiled regexp or NULL in case of error - */ -xmlRegexpPtr -xmlAutomataCompile(xmlAutomataPtr am) { - xmlRegexpPtr ret; - - xmlFAEliminateEpsilonTransitions(am); - /* xmlFAComputesDeterminism(am); */ - ret = xmlRegEpxFromParse(am); - - return(ret); -} - -/** - * xmlAutomataIsDeterminist: - * @am: an automata - * - * Checks if an automata is determinist. - * - * Returns 1 if true, 0 if not, and -1 in case of error - */ -int -xmlAutomataIsDeterminist(xmlAutomataPtr am) { - int ret; - - if (am == NULL) - return(-1); - - ret = xmlFAComputesDeterminism(am); - return(ret); -} -#endif /* LIBXML_AUTOMATA_ENABLED */ -#endif /* LIBXML_REGEXP_ENABLED */ |