summaryrefslogtreecommitdiff
path: root/bundle/libxml/xmlregexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'bundle/libxml/xmlregexp.c')
-rw-r--r--bundle/libxml/xmlregexp.c4319
1 files changed, 0 insertions, 4319 deletions
diff --git a/bundle/libxml/xmlregexp.c b/bundle/libxml/xmlregexp.c
deleted file mode 100644
index 129c1269ec..0000000000
--- a/bundle/libxml/xmlregexp.c
+++ /dev/null
@@ -1,4319 +0,0 @@
-/*
- * regexp.c: generic and extensible Regular Expression engine
- *
- * Basically designed with the purpose of compiling regexps for
- * the variety of validation/shemas mechanisms now available in
- * XML related specifications thise includes:
- * - XML-1.0 DTD validation
- * - XML Schemas structure part 1
- * - XML Schemas Datatypes part 2 especially Appendix F
- * - RELAX-NG/TREX i.e. the counter proposal
- *
- * See Copyright for the status of this software.
- *
- * Daniel Veillard <veillard@redhat.com>
- */
-
-#define IN_LIBXML
-#include "libxml.h"
-
-#ifdef LIBXML_REGEXP_ENABLED
-
-#include <stdio.h>
-#include <string.h>
-#include <libxml/tree.h>
-#include <libxml/parserInternals.h>
-#include <libxml/xmlregexp.h>
-#include <libxml/xmlautomata.h>
-#include <libxml/xmlunicode.h>
-
-/* #define DEBUG_REGEXP_GRAPH */
-/* #define DEBUG_REGEXP_EXEC */
-/* #define DEBUG_PUSH */
-/* #define DEBUG_COMPACTION */
-
-#define ERROR(str) ctxt->error = 1; \
- xmlGenericError(xmlGenericErrorContext, "Regexp: %s: %s\n", str, ctxt->cur)
-#define NEXT ctxt->cur++
-#define CUR (*(ctxt->cur))
-#define NXT(index) (ctxt->cur[index])
-
-#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
-#define NEXTL(l) ctxt->cur += l;
-
-/**
- * TODO:
- *
- * macro to flag unimplemented blocks
- */
-#define TODO \
- xmlGenericError(xmlGenericErrorContext, \
- "Unimplemented block at %s:%d\n", \
- __FILE__, __LINE__);
-
-
-/************************************************************************
- * *
- * Datatypes and structures *
- * *
- ************************************************************************/
-
-typedef enum {
- XML_REGEXP_EPSILON = 1,
- XML_REGEXP_CHARVAL,
- XML_REGEXP_RANGES,
- XML_REGEXP_SUBREG,
- XML_REGEXP_STRING,
- XML_REGEXP_ANYCHAR, /* . */
- XML_REGEXP_ANYSPACE, /* \s */
- XML_REGEXP_NOTSPACE, /* \S */
- XML_REGEXP_INITNAME, /* \l */
- XML_REGEXP_NOTINITNAME, /* \l */
- XML_REGEXP_NAMECHAR, /* \c */
- XML_REGEXP_NOTNAMECHAR, /* \C */
- XML_REGEXP_DECIMAL, /* \d */
- XML_REGEXP_NOTDECIMAL, /* \d */
- XML_REGEXP_REALCHAR, /* \w */
- XML_REGEXP_NOTREALCHAR, /* \w */
- XML_REGEXP_LETTER,
- XML_REGEXP_LETTER_UPPERCASE,
- XML_REGEXP_LETTER_LOWERCASE,
- XML_REGEXP_LETTER_TITLECASE,
- XML_REGEXP_LETTER_MODIFIER,
- XML_REGEXP_LETTER_OTHERS,
- XML_REGEXP_MARK,
- XML_REGEXP_MARK_NONSPACING,
- XML_REGEXP_MARK_SPACECOMBINING,
- XML_REGEXP_MARK_ENCLOSING,
- XML_REGEXP_NUMBER,
- XML_REGEXP_NUMBER_DECIMAL,
- XML_REGEXP_NUMBER_LETTER,
- XML_REGEXP_NUMBER_OTHERS,
- XML_REGEXP_PUNCT,
- XML_REGEXP_PUNCT_CONNECTOR,
- XML_REGEXP_PUNCT_DASH,
- XML_REGEXP_PUNCT_OPEN,
- XML_REGEXP_PUNCT_CLOSE,
- XML_REGEXP_PUNCT_INITQUOTE,
- XML_REGEXP_PUNCT_FINQUOTE,
- XML_REGEXP_PUNCT_OTHERS,
- XML_REGEXP_SEPAR,
- XML_REGEXP_SEPAR_SPACE,
- XML_REGEXP_SEPAR_LINE,
- XML_REGEXP_SEPAR_PARA,
- XML_REGEXP_SYMBOL,
- XML_REGEXP_SYMBOL_MATH,
- XML_REGEXP_SYMBOL_CURRENCY,
- XML_REGEXP_SYMBOL_MODIFIER,
- XML_REGEXP_SYMBOL_OTHERS,
- XML_REGEXP_OTHER,
- XML_REGEXP_OTHER_CONTROL,
- XML_REGEXP_OTHER_FORMAT,
- XML_REGEXP_OTHER_PRIVATE,
- XML_REGEXP_OTHER_NA,
- XML_REGEXP_BLOCK_NAME
-} xmlRegAtomType;
-
-typedef enum {
- XML_REGEXP_QUANT_EPSILON = 1,
- XML_REGEXP_QUANT_ONCE,
- XML_REGEXP_QUANT_OPT,
- XML_REGEXP_QUANT_MULT,
- XML_REGEXP_QUANT_PLUS,
- XML_REGEXP_QUANT_ONCEONLY,
- XML_REGEXP_QUANT_ALL,
- XML_REGEXP_QUANT_RANGE
-} xmlRegQuantType;
-
-typedef enum {
- XML_REGEXP_START_STATE = 1,
- XML_REGEXP_FINAL_STATE,
- XML_REGEXP_TRANS_STATE
-} xmlRegStateType;
-
-typedef enum {
- XML_REGEXP_MARK_NORMAL = 0,
- XML_REGEXP_MARK_START,
- XML_REGEXP_MARK_VISITED
-} xmlRegMarkedType;
-
-typedef struct _xmlRegRange xmlRegRange;
-typedef xmlRegRange *xmlRegRangePtr;
-
-struct _xmlRegRange {
- int neg;
- xmlRegAtomType type;
- int start;
- int end;
- xmlChar *blockName;
-};
-
-typedef struct _xmlRegAtom xmlRegAtom;
-typedef xmlRegAtom *xmlRegAtomPtr;
-
-typedef struct _xmlAutomataState xmlRegState;
-typedef xmlRegState *xmlRegStatePtr;
-
-struct _xmlRegAtom {
- int no;
- xmlRegAtomType type;
- xmlRegQuantType quant;
- int min;
- int max;
-
- void *valuep;
- void *valuep2;
- int neg;
- int codepoint;
- xmlRegStatePtr start;
- xmlRegStatePtr stop;
- int maxRanges;
- int nbRanges;
- xmlRegRangePtr *ranges;
- void *data;
-};
-
-typedef struct _xmlRegCounter xmlRegCounter;
-typedef xmlRegCounter *xmlRegCounterPtr;
-
-struct _xmlRegCounter {
- int min;
- int max;
-};
-
-typedef struct _xmlRegTrans xmlRegTrans;
-typedef xmlRegTrans *xmlRegTransPtr;
-
-struct _xmlRegTrans {
- xmlRegAtomPtr atom;
- int to;
- int counter;
- int count;
-};
-
-struct _xmlAutomataState {
- xmlRegStateType type;
- xmlRegMarkedType mark;
- xmlRegMarkedType reached;
- int no;
-
- int maxTrans;
- int nbTrans;
- xmlRegTrans *trans;
-};
-
-typedef struct _xmlAutomata xmlRegParserCtxt;
-typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
-
-struct _xmlAutomata {
- xmlChar *string;
- xmlChar *cur;
-
- int error;
- int neg;
-
- xmlRegStatePtr start;
- xmlRegStatePtr end;
- xmlRegStatePtr state;
-
- xmlRegAtomPtr atom;
-
- int maxAtoms;
- int nbAtoms;
- xmlRegAtomPtr *atoms;
-
- int maxStates;
- int nbStates;
- xmlRegStatePtr *states;
-
- int maxCounters;
- int nbCounters;
- xmlRegCounter *counters;
-
- int determinist;
-};
-
-struct _xmlRegexp {
- xmlChar *string;
- int nbStates;
- xmlRegStatePtr *states;
- int nbAtoms;
- xmlRegAtomPtr *atoms;
- int nbCounters;
- xmlRegCounter *counters;
- int determinist;
- /*
- * That's the compact form for determinists automatas
- */
- int nbstates;
- int *compact;
- void **transdata;
- int nbstrings;
- xmlChar **stringMap;
-};
-
-typedef struct _xmlRegExecRollback xmlRegExecRollback;
-typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
-
-struct _xmlRegExecRollback {
- xmlRegStatePtr state;/* the current state */
- int index; /* the index in the input stack */
- int nextbranch; /* the next transition to explore in that state */
- int *counts; /* save the automate state if it has some */
-};
-
-typedef struct _xmlRegInputToken xmlRegInputToken;
-typedef xmlRegInputToken *xmlRegInputTokenPtr;
-
-struct _xmlRegInputToken {
- xmlChar *value;
- void *data;
-};
-
-struct _xmlRegExecCtxt {
- int status; /* execution status != 0 indicate an error */
- int determinist; /* did we found an inderterministic behaviour */
- xmlRegexpPtr comp; /* the compiled regexp */
- xmlRegExecCallbacks callback;
- void *data;
-
- xmlRegStatePtr state;/* the current state */
- int transno; /* the current transition on that state */
- int transcount; /* the number of char in char counted transitions */
-
- /*
- * A stack of rollback states
- */
- int maxRollbacks;
- int nbRollbacks;
- xmlRegExecRollback *rollbacks;
-
- /*
- * The state of the automata if any
- */
- int *counts;
-
- /*
- * The input stack
- */
- int inputStackMax;
- int inputStackNr;
- int index;
- int *charStack;
- const xmlChar *inputString; /* when operating on characters */
- xmlRegInputTokenPtr inputStack;/* when operating on strings */
-
-};
-
-#define REGEXP_ALL_COUNTER 0x123456
-#define REGEXP_ALL_LAX_COUNTER 0x123457
-
-static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
-static void xmlRegFreeState(xmlRegStatePtr state);
-static void xmlRegFreeAtom(xmlRegAtomPtr atom);
-
-/************************************************************************
- * *
- * Allocation/Deallocation *
- * *
- ************************************************************************/
-
-static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
-/**
- * xmlRegEpxFromParse:
- * @ctxt: the parser context used to build it
- *
- * Allocate a new regexp and fill it with the reult from the parser
- *
- * Returns the new regexp or NULL in case of error
- */
-static xmlRegexpPtr
-xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
- xmlRegexpPtr ret;
-
- ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
- if (ret == NULL)
- return(NULL);
- memset(ret, 0, sizeof(xmlRegexp));
- ret->string = ctxt->string;
- ctxt->string = NULL;
- ret->nbStates = ctxt->nbStates;
- ctxt->nbStates = 0;
- ret->states = ctxt->states;
- ctxt->states = NULL;
- ret->nbAtoms = ctxt->nbAtoms;
- ctxt->nbAtoms = 0;
- ret->atoms = ctxt->atoms;
- ctxt->atoms = NULL;
- ret->nbCounters = ctxt->nbCounters;
- ctxt->nbCounters = 0;
- ret->counters = ctxt->counters;
- ctxt->counters = NULL;
- ret->determinist = ctxt->determinist;
-
- if ((ret->determinist != 0) &&
- (ret->nbCounters == 0) &&
- (ret->atoms != NULL) &&
- (ret->atoms[0] != NULL) &&
- (ret->atoms[0]->type == XML_REGEXP_STRING)) {
- int i, j, nbstates = 0, nbatoms = 0;
- int *stateRemap;
- int *stringRemap;
- int *transitions;
- void **transdata;
- xmlChar **stringMap;
- xmlChar *value;
-
- /*
- * Switch to a compact representation
- * 1/ counting the effective number of states left
- * 2/ conting the unique number of atoms, and check that
- * they are all of the string type
- * 3/ build a table state x atom for the transitions
- */
-
- stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
- for (i = 0;i < ret->nbStates;i++) {
- if (ret->states[i] != NULL) {
- stateRemap[i] = nbstates;
- nbstates++;
- } else {
- stateRemap[i] = -1;
- }
- }
-#ifdef DEBUG_COMPACTION
- printf("Final: %d states\n", nbstates);
-#endif
- stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
- stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
- for (i = 0;i < ret->nbAtoms;i++) {
- if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
- (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
- value = ret->atoms[i]->valuep;
- for (j = 0;j < nbatoms;j++) {
- if (xmlStrEqual(stringMap[j], value)) {
- stringRemap[i] = j;
- break;
- }
- }
- if (j >= nbatoms) {
- stringRemap[i] = nbatoms;
- stringMap[nbatoms] = xmlStrdup(value);
- nbatoms++;
- }
- } else {
- xmlFree(stateRemap);
- xmlFree(stringRemap);
- for (i = 0;i < nbatoms;i++)
- xmlFree(stringMap[i]);
- xmlFree(stringMap);
- goto fail_compact;
- }
- }
-#ifdef DEBUG_COMPACTION
- printf("Final: %d atoms\n", nbatoms);
-#endif
-
- /*
- * Allocate the transition table. The first entry for each
- * state correspond to the state type.
- */
- transitions = (int *) xmlMalloc(nbstates * (nbatoms + 1) * sizeof(int));
- transdata = NULL;
- memset(transitions, 0, nbstates * (nbatoms + 1) * sizeof(int));
-
- for (i = 0;i < ret->nbStates;i++) {
- int stateno, atomno, targetno, prev;
- xmlRegStatePtr state;
- xmlRegTransPtr trans;
-
- stateno = stateRemap[i];
- if (stateno == -1)
- continue;
- state = ret->states[i];
-
- transitions[stateno * (nbatoms + 1)] = state->type;
-
- for (j = 0;j < state->nbTrans;j++) {
- trans = &(state->trans[j]);
- if ((trans->to == -1) || (trans->atom == NULL))
- continue;
- atomno = stringRemap[trans->atom->no];
- if ((trans->atom->data != NULL) && (transdata == NULL)) {
- transdata = (void **) xmlMalloc(nbstates * nbatoms *
- sizeof(void *));
- if (transdata != NULL)
- memset(transdata, 0,
- nbstates * nbatoms * sizeof(void *));
- }
- targetno = stateRemap[trans->to];
- /*
- * if the same atome can generate transition to 2 different
- * states then it means the automata is not determinist and
- * the compact form can't be used !
- */
- prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
- if (prev != 0) {
- if (prev != targetno + 1) {
- printf("not determinist\n");
- ret->determinist = 0;
-#ifdef DEBUG_COMPACTION
- printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
- i, j, trans->atom->no, trans->to, atomno, targetno);
- printf(" previous to is %d\n", prev);
-#endif
- ret->determinist = 0;
- if (transdata != NULL)
- xmlFree(transdata);
- xmlFree(transitions);
- xmlFree(stateRemap);
- xmlFree(stringRemap);
- for (i = 0;i < nbatoms;i++)
- xmlFree(stringMap[i]);
- xmlFree(stringMap);
- goto fail_compact;
- }
- } else {
-#if 0
- printf("State %d trans %d: atom %d to %d : %d to %d\n",
- i, j, trans->atom->no, trans->to, atomno, targetno);
-#endif
- transitions[stateno * (nbatoms + 1) + atomno + 1] =
- targetno + 1; /* to avoid 0 */
- if (transdata != NULL)
- transdata[stateno * nbatoms + atomno] =
- trans->atom->data;
- }
- }
- }
- ret->determinist = 1;
-#ifdef DEBUG_COMPACTION
- /*
- * Debug
- */
- for (i = 0;i < nbstates;i++) {
- for (j = 0;j < nbatoms + 1;j++) {
- printf("%02d ", transitions[i * (nbatoms + 1) + j]);
- }
- printf("\n");
- }
- printf("\n");
-#endif
- /*
- * Cleanup of the old data
- */
- if (ret->states != NULL) {
- for (i = 0;i < ret->nbStates;i++)
- xmlRegFreeState(ret->states[i]);
- xmlFree(ret->states);
- }
- ret->states = NULL;
- ret->nbStates = 0;
- if (ret->atoms != NULL) {
- for (i = 0;i < ret->nbAtoms;i++)
- xmlRegFreeAtom(ret->atoms[i]);
- xmlFree(ret->atoms);
- }
- ret->atoms = NULL;
- ret->nbAtoms = 0;
-
- ret->compact = transitions;
- ret->transdata = transdata;
- ret->stringMap = stringMap;
- ret->nbstrings = nbatoms;
- ret->nbstates = nbstates;
- xmlFree(stateRemap);
- xmlFree(stringRemap);
- }
-fail_compact:
- return(ret);
-}
-
-/**
- * xmlRegNewParserCtxt:
- * @string: the string to parse
- *
- * Allocate a new regexp parser context
- *
- * Returns the new context or NULL in case of error
- */
-static xmlRegParserCtxtPtr
-xmlRegNewParserCtxt(const xmlChar *string) {
- xmlRegParserCtxtPtr ret;
-
- ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
- if (ret == NULL)
- return(NULL);
- memset(ret, 0, sizeof(xmlRegParserCtxt));
- if (string != NULL)
- ret->string = xmlStrdup(string);
- ret->cur = ret->string;
- ret->neg = 0;
- ret->error = 0;
- ret->determinist = -1;
- return(ret);
-}
-
-/**
- * xmlRegNewRange:
- * @ctxt: the regexp parser context
- * @neg: is that negative
- * @type: the type of range
- * @start: the start codepoint
- * @end: the end codepoint
- *
- * Allocate a new regexp range
- *
- * Returns the new range or NULL in case of error
- */
-static xmlRegRangePtr
-xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
- int neg, xmlRegAtomType type, int start, int end) {
- xmlRegRangePtr ret;
-
- ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
- if (ret == NULL) {
- ERROR("failed to allocate regexp range");
- return(NULL);
- }
- ret->neg = neg;
- ret->type = type;
- ret->start = start;
- ret->end = end;
- return(ret);
-}
-
-/**
- * xmlRegFreeRange:
- * @range: the regexp range
- *
- * Free a regexp range
- */
-static void
-xmlRegFreeRange(xmlRegRangePtr range) {
- if (range == NULL)
- return;
-
- if (range->blockName != NULL)
- xmlFree(range->blockName);
- xmlFree(range);
-}
-
-/**
- * xmlRegNewAtom:
- * @ctxt: the regexp parser context
- * @type: the type of atom
- *
- * Allocate a new regexp range
- *
- * Returns the new atom or NULL in case of error
- */
-static xmlRegAtomPtr
-xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
- xmlRegAtomPtr ret;
-
- ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
- if (ret == NULL) {
- ERROR("failed to allocate regexp atom");
- return(NULL);
- }
- memset(ret, 0, sizeof(xmlRegAtom));
- ret->type = type;
- ret->quant = XML_REGEXP_QUANT_ONCE;
- ret->min = 0;
- ret->max = 0;
- return(ret);
-}
-
-/**
- * xmlRegFreeAtom:
- * @atom: the regexp atom
- *
- * Free a regexp atom
- */
-static void
-xmlRegFreeAtom(xmlRegAtomPtr atom) {
- int i;
-
- if (atom == NULL)
- return;
-
- for (i = 0;i < atom->nbRanges;i++)
- xmlRegFreeRange(atom->ranges[i]);
- if (atom->ranges != NULL)
- xmlFree(atom->ranges);
- if (atom->type == XML_REGEXP_STRING)
- xmlFree(atom->valuep);
- xmlFree(atom);
-}
-
-static xmlRegStatePtr
-xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
- xmlRegStatePtr ret;
-
- ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
- if (ret == NULL) {
- ERROR("failed to allocate regexp state");
- return(NULL);
- }
- memset(ret, 0, sizeof(xmlRegState));
- ret->type = XML_REGEXP_TRANS_STATE;
- ret->mark = XML_REGEXP_MARK_NORMAL;
- return(ret);
-}
-
-/**
- * xmlRegFreeState:
- * @state: the regexp state
- *
- * Free a regexp state
- */
-static void
-xmlRegFreeState(xmlRegStatePtr state) {
- if (state == NULL)
- return;
-
- if (state->trans != NULL)
- xmlFree(state->trans);
- xmlFree(state);
-}
-
-/**
- * xmlRegFreeParserCtxt:
- * @ctxt: the regexp parser context
- *
- * Free a regexp parser context
- */
-static void
-xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
- int i;
- if (ctxt == NULL)
- return;
-
- if (ctxt->string != NULL)
- xmlFree(ctxt->string);
- if (ctxt->states != NULL) {
- for (i = 0;i < ctxt->nbStates;i++)
- xmlRegFreeState(ctxt->states[i]);
- xmlFree(ctxt->states);
- }
- if (ctxt->atoms != NULL) {
- for (i = 0;i < ctxt->nbAtoms;i++)
- xmlRegFreeAtom(ctxt->atoms[i]);
- xmlFree(ctxt->atoms);
- }
- if (ctxt->counters != NULL)
- xmlFree(ctxt->counters);
- xmlFree(ctxt);
-}
-
-/************************************************************************
- * *
- * Display of Data structures *
- * *
- ************************************************************************/
-
-static void
-xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
- switch (type) {
- case XML_REGEXP_EPSILON:
- fprintf(output, "epsilon "); break;
- case XML_REGEXP_CHARVAL:
- fprintf(output, "charval "); break;
- case XML_REGEXP_RANGES:
- fprintf(output, "ranges "); break;
- case XML_REGEXP_SUBREG:
- fprintf(output, "subexpr "); break;
- case XML_REGEXP_STRING:
- fprintf(output, "string "); break;
- case XML_REGEXP_ANYCHAR:
- fprintf(output, "anychar "); break;
- case XML_REGEXP_ANYSPACE:
- fprintf(output, "anyspace "); break;
- case XML_REGEXP_NOTSPACE:
- fprintf(output, "notspace "); break;
- case XML_REGEXP_INITNAME:
- fprintf(output, "initname "); break;
- case XML_REGEXP_NOTINITNAME:
- fprintf(output, "notinitname "); break;
- case XML_REGEXP_NAMECHAR:
- fprintf(output, "namechar "); break;
- case XML_REGEXP_NOTNAMECHAR:
- fprintf(output, "notnamechar "); break;
- case XML_REGEXP_DECIMAL:
- fprintf(output, "decimal "); break;
- case XML_REGEXP_NOTDECIMAL:
- fprintf(output, "notdecimal "); break;
- case XML_REGEXP_REALCHAR:
- fprintf(output, "realchar "); break;
- case XML_REGEXP_NOTREALCHAR:
- fprintf(output, "notrealchar "); break;
- case XML_REGEXP_LETTER:
- fprintf(output, "LETTER "); break;
- case XML_REGEXP_LETTER_UPPERCASE:
- fprintf(output, "LETTER_UPPERCASE "); break;
- case XML_REGEXP_LETTER_LOWERCASE:
- fprintf(output, "LETTER_LOWERCASE "); break;
- case XML_REGEXP_LETTER_TITLECASE:
- fprintf(output, "LETTER_TITLECASE "); break;
- case XML_REGEXP_LETTER_MODIFIER:
- fprintf(output, "LETTER_MODIFIER "); break;
- case XML_REGEXP_LETTER_OTHERS:
- fprintf(output, "LETTER_OTHERS "); break;
- case XML_REGEXP_MARK:
- fprintf(output, "MARK "); break;
- case XML_REGEXP_MARK_NONSPACING:
- fprintf(output, "MARK_NONSPACING "); break;
- case XML_REGEXP_MARK_SPACECOMBINING:
- fprintf(output, "MARK_SPACECOMBINING "); break;
- case XML_REGEXP_MARK_ENCLOSING:
- fprintf(output, "MARK_ENCLOSING "); break;
- case XML_REGEXP_NUMBER:
- fprintf(output, "NUMBER "); break;
- case XML_REGEXP_NUMBER_DECIMAL:
- fprintf(output, "NUMBER_DECIMAL "); break;
- case XML_REGEXP_NUMBER_LETTER:
- fprintf(output, "NUMBER_LETTER "); break;
- case XML_REGEXP_NUMBER_OTHERS:
- fprintf(output, "NUMBER_OTHERS "); break;
- case XML_REGEXP_PUNCT:
- fprintf(output, "PUNCT "); break;
- case XML_REGEXP_PUNCT_CONNECTOR:
- fprintf(output, "PUNCT_CONNECTOR "); break;
- case XML_REGEXP_PUNCT_DASH:
- fprintf(output, "PUNCT_DASH "); break;
- case XML_REGEXP_PUNCT_OPEN:
- fprintf(output, "PUNCT_OPEN "); break;
- case XML_REGEXP_PUNCT_CLOSE:
- fprintf(output, "PUNCT_CLOSE "); break;
- case XML_REGEXP_PUNCT_INITQUOTE:
- fprintf(output, "PUNCT_INITQUOTE "); break;
- case XML_REGEXP_PUNCT_FINQUOTE:
- fprintf(output, "PUNCT_FINQUOTE "); break;
- case XML_REGEXP_PUNCT_OTHERS:
- fprintf(output, "PUNCT_OTHERS "); break;
- case XML_REGEXP_SEPAR:
- fprintf(output, "SEPAR "); break;
- case XML_REGEXP_SEPAR_SPACE:
- fprintf(output, "SEPAR_SPACE "); break;
- case XML_REGEXP_SEPAR_LINE:
- fprintf(output, "SEPAR_LINE "); break;
- case XML_REGEXP_SEPAR_PARA:
- fprintf(output, "SEPAR_PARA "); break;
- case XML_REGEXP_SYMBOL:
- fprintf(output, "SYMBOL "); break;
- case XML_REGEXP_SYMBOL_MATH:
- fprintf(output, "SYMBOL_MATH "); break;
- case XML_REGEXP_SYMBOL_CURRENCY:
- fprintf(output, "SYMBOL_CURRENCY "); break;
- case XML_REGEXP_SYMBOL_MODIFIER:
- fprintf(output, "SYMBOL_MODIFIER "); break;
- case XML_REGEXP_SYMBOL_OTHERS:
- fprintf(output, "SYMBOL_OTHERS "); break;
- case XML_REGEXP_OTHER:
- fprintf(output, "OTHER "); break;
- case XML_REGEXP_OTHER_CONTROL:
- fprintf(output, "OTHER_CONTROL "); break;
- case XML_REGEXP_OTHER_FORMAT:
- fprintf(output, "OTHER_FORMAT "); break;
- case XML_REGEXP_OTHER_PRIVATE:
- fprintf(output, "OTHER_PRIVATE "); break;
- case XML_REGEXP_OTHER_NA:
- fprintf(output, "OTHER_NA "); break;
- case XML_REGEXP_BLOCK_NAME:
- fprintf(output, "BLOCK "); break;
- }
-}
-
-static void
-xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
- switch (type) {
- case XML_REGEXP_QUANT_EPSILON:
- fprintf(output, "epsilon "); break;
- case XML_REGEXP_QUANT_ONCE:
- fprintf(output, "once "); break;
- case XML_REGEXP_QUANT_OPT:
- fprintf(output, "? "); break;
- case XML_REGEXP_QUANT_MULT:
- fprintf(output, "* "); break;
- case XML_REGEXP_QUANT_PLUS:
- fprintf(output, "+ "); break;
- case XML_REGEXP_QUANT_RANGE:
- fprintf(output, "range "); break;
- case XML_REGEXP_QUANT_ONCEONLY:
- fprintf(output, "onceonly "); break;
- case XML_REGEXP_QUANT_ALL:
- fprintf(output, "all "); break;
- }
-}
-static void
-xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
- fprintf(output, " range: ");
- if (range->neg)
- fprintf(output, "negative ");
- xmlRegPrintAtomType(output, range->type);
- fprintf(output, "%c - %c\n", range->start, range->end);
-}
-
-static void
-xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
- fprintf(output, " atom: ");
- if (atom == NULL) {
- fprintf(output, "NULL\n");
- return;
- }
- xmlRegPrintAtomType(output, atom->type);
- xmlRegPrintQuantType(output, atom->quant);
- if (atom->quant == XML_REGEXP_QUANT_RANGE)
- fprintf(output, "%d-%d ", atom->min, atom->max);
- if (atom->type == XML_REGEXP_STRING)
- fprintf(output, "'%s' ", (char *) atom->valuep);
- if (atom->type == XML_REGEXP_CHARVAL)
- fprintf(output, "char %c\n", atom->codepoint);
- else if (atom->type == XML_REGEXP_RANGES) {
- int i;
- fprintf(output, "%d entries\n", atom->nbRanges);
- for (i = 0; i < atom->nbRanges;i++)
- xmlRegPrintRange(output, atom->ranges[i]);
- } else if (atom->type == XML_REGEXP_SUBREG) {
- fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
- } else {
- fprintf(output, "\n");
- }
-}
-
-static void
-xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
- fprintf(output, " trans: ");
- if (trans == NULL) {
- fprintf(output, "NULL\n");
- return;
- }
- if (trans->to < 0) {
- fprintf(output, "removed\n");
- return;
- }
- if (trans->counter >= 0) {
- fprintf(output, "counted %d, ", trans->counter);
- }
- if (trans->count == REGEXP_ALL_COUNTER) {
- fprintf(output, "all transition, ");
- } else if (trans->count >= 0) {
- fprintf(output, "count based %d, ", trans->count);
- }
- if (trans->atom == NULL) {
- fprintf(output, "epsilon to %d\n", trans->to);
- return;
- }
- if (trans->atom->type == XML_REGEXP_CHARVAL)
- fprintf(output, "char %c ", trans->atom->codepoint);
- fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
-}
-
-static void
-xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
- int i;
-
- fprintf(output, " state: ");
- if (state == NULL) {
- fprintf(output, "NULL\n");
- return;
- }
- if (state->type == XML_REGEXP_START_STATE)
- fprintf(output, "START ");
- if (state->type == XML_REGEXP_FINAL_STATE)
- fprintf(output, "FINAL ");
-
- fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
- for (i = 0;i < state->nbTrans; i++) {
- xmlRegPrintTrans(output, &(state->trans[i]));
- }
-}
-
-#ifdef DEBUG_REGEXP_GRAPH
-static void
-xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
- int i;
-
- fprintf(output, " ctxt: ");
- if (ctxt == NULL) {
- fprintf(output, "NULL\n");
- return;
- }
- fprintf(output, "'%s' ", ctxt->string);
- if (ctxt->error)
- fprintf(output, "error ");
- if (ctxt->neg)
- fprintf(output, "neg ");
- fprintf(output, "\n");
- fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
- for (i = 0;i < ctxt->nbAtoms; i++) {
- fprintf(output, " %02d ", i);
- xmlRegPrintAtom(output, ctxt->atoms[i]);
- }
- if (ctxt->atom != NULL) {
- fprintf(output, "current atom:\n");
- xmlRegPrintAtom(output, ctxt->atom);
- }
- fprintf(output, "%d states:", ctxt->nbStates);
- if (ctxt->start != NULL)
- fprintf(output, " start: %d", ctxt->start->no);
- if (ctxt->end != NULL)
- fprintf(output, " end: %d", ctxt->end->no);
- fprintf(output, "\n");
- for (i = 0;i < ctxt->nbStates; i++) {
- xmlRegPrintState(output, ctxt->states[i]);
- }
- fprintf(output, "%d counters:\n", ctxt->nbCounters);
- for (i = 0;i < ctxt->nbCounters; i++) {
- fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
- ctxt->counters[i].max);
- }
-}
-#endif
-
-/************************************************************************
- * *
- * Finite Automata structures manipulations *
- * *
- ************************************************************************/
-
-static void
-xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
- int neg, xmlRegAtomType type, int start, int end,
- xmlChar *blockName) {
- xmlRegRangePtr range;
-
- if (atom == NULL) {
- ERROR("add range: atom is NULL");
- return;
- }
- if (atom->type != XML_REGEXP_RANGES) {
- ERROR("add range: atom is not ranges");
- return;
- }
- if (atom->maxRanges == 0) {
- atom->maxRanges = 4;
- atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
- sizeof(xmlRegRangePtr));
- if (atom->ranges == NULL) {
- ERROR("add range: allocation failed");
- atom->maxRanges = 0;
- return;
- }
- } else if (atom->nbRanges >= atom->maxRanges) {
- xmlRegRangePtr *tmp;
- atom->maxRanges *= 2;
- tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
- sizeof(xmlRegRangePtr));
- if (tmp == NULL) {
- ERROR("add range: allocation failed");
- atom->maxRanges /= 2;
- return;
- }
- atom->ranges = tmp;
- }
- range = xmlRegNewRange(ctxt, neg, type, start, end);
- if (range == NULL)
- return;
- range->blockName = blockName;
- atom->ranges[atom->nbRanges++] = range;
-
-}
-
-static int
-xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
- if (ctxt->maxCounters == 0) {
- ctxt->maxCounters = 4;
- ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
- sizeof(xmlRegCounter));
- if (ctxt->counters == NULL) {
- ERROR("reg counter: allocation failed");
- ctxt->maxCounters = 0;
- return(-1);
- }
- } else if (ctxt->nbCounters >= ctxt->maxCounters) {
- xmlRegCounter *tmp;
- ctxt->maxCounters *= 2;
- tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
- sizeof(xmlRegCounter));
- if (tmp == NULL) {
- ERROR("reg counter: allocation failed");
- ctxt->maxCounters /= 2;
- return(-1);
- }
- ctxt->counters = tmp;
- }
- ctxt->counters[ctxt->nbCounters].min = -1;
- ctxt->counters[ctxt->nbCounters].max = -1;
- return(ctxt->nbCounters++);
-}
-
-static void
-xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
- if (atom == NULL) {
- ERROR("atom push: atom is NULL");
- return;
- }
- if (ctxt->maxAtoms == 0) {
- ctxt->maxAtoms = 4;
- ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
- sizeof(xmlRegAtomPtr));
- if (ctxt->atoms == NULL) {
- ERROR("atom push: allocation failed");
- ctxt->maxAtoms = 0;
- return;
- }
- } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
- xmlRegAtomPtr *tmp;
- ctxt->maxAtoms *= 2;
- tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
- sizeof(xmlRegAtomPtr));
- if (tmp == NULL) {
- ERROR("atom push: allocation failed");
- ctxt->maxAtoms /= 2;
- return;
- }
- ctxt->atoms = tmp;
- }
- atom->no = ctxt->nbAtoms;
- ctxt->atoms[ctxt->nbAtoms++] = atom;
-}
-
-static void
-xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
- xmlRegAtomPtr atom, xmlRegStatePtr target,
- int counter, int count) {
- if (state == NULL) {
- ERROR("add state: state is NULL");
- return;
- }
- if (target == NULL) {
- ERROR("add state: target is NULL");
- return;
- }
- if (state->maxTrans == 0) {
- state->maxTrans = 4;
- state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
- sizeof(xmlRegTrans));
- if (state->trans == NULL) {
- ERROR("add range: allocation failed");
- state->maxTrans = 0;
- return;
- }
- } else if (state->nbTrans >= state->maxTrans) {
- xmlRegTrans *tmp;
- state->maxTrans *= 2;
- tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
- sizeof(xmlRegTrans));
- if (tmp == NULL) {
- ERROR("add range: allocation failed");
- state->maxTrans /= 2;
- return;
- }
- state->trans = tmp;
- }
-#ifdef DEBUG_REGEXP_GRAPH
- printf("Add trans from %d to %d ", state->no, target->no);
- if (count == REGEXP_ALL_COUNTER)
- printf("all transition");
- else if (count >= 0)
- printf("count based %d", count);
- else if (counter >= 0)
- printf("counted %d", counter);
- else if (atom == NULL)
- printf("epsilon transition");
- printf("\n");
-#endif
-
- state->trans[state->nbTrans].atom = atom;
- state->trans[state->nbTrans].to = target->no;
- state->trans[state->nbTrans].counter = counter;
- state->trans[state->nbTrans].count = count;
- state->nbTrans++;
-}
-
-static void
-xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
- if (ctxt->maxStates == 0) {
- ctxt->maxStates = 4;
- ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
- sizeof(xmlRegStatePtr));
- if (ctxt->states == NULL) {
- ERROR("add range: allocation failed");
- ctxt->maxStates = 0;
- return;
- }
- } else if (ctxt->nbStates >= ctxt->maxStates) {
- xmlRegStatePtr *tmp;
- ctxt->maxStates *= 2;
- tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
- sizeof(xmlRegStatePtr));
- if (tmp == NULL) {
- ERROR("add range: allocation failed");
- ctxt->maxStates /= 2;
- return;
- }
- ctxt->states = tmp;
- }
- state->no = ctxt->nbStates;
- ctxt->states[ctxt->nbStates++] = state;
-}
-
-/**
- * xmlFAGenerateAllTransition:
- * @ctxt: a regexp parser context
- * @from: the from state
- * @to: the target state or NULL for building a new one
- * @lax:
- *
- */
-static void
-xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
- xmlRegStatePtr from, xmlRegStatePtr to,
- int lax) {
- if (to == NULL) {
- to = xmlRegNewState(ctxt);
- xmlRegStatePush(ctxt, to);
- ctxt->state = to;
- }
- if (lax)
- xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
- else
- xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
-}
-
-/**
- * xmlFAGenerateEpsilonTransition:
- * @ctxt: a regexp parser context
- * @from: the from state
- * @to: the target state or NULL for building a new one
- *
- */
-static void
-xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
- xmlRegStatePtr from, xmlRegStatePtr to) {
- if (to == NULL) {
- to = xmlRegNewState(ctxt);
- xmlRegStatePush(ctxt, to);
- ctxt->state = to;
- }
- xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
-}
-
-/**
- * xmlFAGenerateCountedEpsilonTransition:
- * @ctxt: a regexp parser context
- * @from: the from state
- * @to: the target state or NULL for building a new one
- * counter: the counter for that transition
- *
- */
-static void
-xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
- xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
- if (to == NULL) {
- to = xmlRegNewState(ctxt);
- xmlRegStatePush(ctxt, to);
- ctxt->state = to;
- }
- xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
-}
-
-/**
- * xmlFAGenerateCountedTransition:
- * @ctxt: a regexp parser context
- * @from: the from state
- * @to: the target state or NULL for building a new one
- * counter: the counter for that transition
- *
- */
-static void
-xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
- xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
- if (to == NULL) {
- to = xmlRegNewState(ctxt);
- xmlRegStatePush(ctxt, to);
- ctxt->state = to;
- }
- xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
-}
-
-/**
- * xmlFAGenerateTransitions:
- * @ctxt: a regexp parser context
- * @from: the from state
- * @to: the target state or NULL for building a new one
- * @atom: the atom generating the transition
- *
- */
-static void
-xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
- xmlRegStatePtr to, xmlRegAtomPtr atom) {
- if (atom == NULL) {
- ERROR("genrate transition: atom == NULL");
- return;
- }
- if (atom->type == XML_REGEXP_SUBREG) {
- /*
- * this is a subexpression handling one should not need to
- * create a new node excep for XML_REGEXP_QUANT_RANGE.
- */
- xmlRegAtomPush(ctxt, atom);
- if ((to != NULL) && (atom->stop != to) &&
- (atom->quant != XML_REGEXP_QUANT_RANGE)) {
- /*
- * Generate an epsilon transition to link to the target
- */
- xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
- }
- switch (atom->quant) {
- case XML_REGEXP_QUANT_OPT:
- atom->quant = XML_REGEXP_QUANT_ONCE;
- xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
- break;
- case XML_REGEXP_QUANT_MULT:
- atom->quant = XML_REGEXP_QUANT_ONCE;
- xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
- xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
- break;
- case XML_REGEXP_QUANT_PLUS:
- atom->quant = XML_REGEXP_QUANT_ONCE;
- xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
- break;
- case XML_REGEXP_QUANT_RANGE: {
- int counter;
- xmlRegStatePtr newstate;
-
- /*
- * This one is nasty:
- * 1/ register a new counter
- * 2/ register an epsilon transition associated to
- * this counter going from atom->stop to atom->start
- * 3/ create a new state
- * 4/ generate a counted transition from atom->stop to
- * that state
- */
- counter = xmlRegGetCounter(ctxt);
- ctxt->counters[counter].min = atom->min - 1;
- ctxt->counters[counter].max = atom->max - 1;
- atom->min = 0;
- atom->max = 0;
- atom->quant = XML_REGEXP_QUANT_ONCE;
- xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
- atom->start, counter);
- if (to != NULL) {
- newstate = to;
- } else {
- newstate = xmlRegNewState(ctxt);
- xmlRegStatePush(ctxt, newstate);
- ctxt->state = newstate;
- }
- xmlFAGenerateCountedTransition(ctxt, atom->stop,
- newstate, counter);
- }
- default:
- break;
- }
- return;
- } else {
- if (to == NULL) {
- to = xmlRegNewState(ctxt);
- xmlRegStatePush(ctxt, to);
- }
- xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
- xmlRegAtomPush(ctxt, atom);
- ctxt->state = to;
- }
- switch (atom->quant) {
- case XML_REGEXP_QUANT_OPT:
- atom->quant = XML_REGEXP_QUANT_ONCE;
- xmlFAGenerateEpsilonTransition(ctxt, from, to);
- break;
- case XML_REGEXP_QUANT_MULT:
- atom->quant = XML_REGEXP_QUANT_ONCE;
- xmlFAGenerateEpsilonTransition(ctxt, from, to);
- xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
- break;
- case XML_REGEXP_QUANT_PLUS:
- atom->quant = XML_REGEXP_QUANT_ONCE;
- xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
- break;
- default:
- break;
- }
-}
-
-/**
- * xmlFAReduceEpsilonTransitions:
- * @ctxt: a regexp parser context
- * @fromnr: the from state
- * @tonr: the to state
- * @cpunter: should that transition be associted to a counted
- *
- */
-static void
-xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
- int tonr, int counter) {
- int transnr;
- xmlRegStatePtr from;
- xmlRegStatePtr to;
-
-#ifdef DEBUG_REGEXP_GRAPH
- printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
-#endif
- from = ctxt->states[fromnr];
- if (from == NULL)
- return;
- to = ctxt->states[tonr];
- if (to == NULL)
- return;
- if ((to->mark == XML_REGEXP_MARK_START) ||
- (to->mark == XML_REGEXP_MARK_VISITED))
- return;
-
- to->mark = XML_REGEXP_MARK_VISITED;
- if (to->type == XML_REGEXP_FINAL_STATE) {
-#ifdef DEBUG_REGEXP_GRAPH
- printf("State %d is final, so %d becomes final\n", tonr, fromnr);
-#endif
- from->type = XML_REGEXP_FINAL_STATE;
- }
- for (transnr = 0;transnr < to->nbTrans;transnr++) {
- if (to->trans[transnr].atom == NULL) {
- /*
- * Don't remove counted transitions
- * Don't loop either
- */
- if (to->trans[transnr].to != fromnr) {
- if (to->trans[transnr].count >= 0) {
- int newto = to->trans[transnr].to;
-
- xmlRegStateAddTrans(ctxt, from, NULL,
- ctxt->states[newto],
- -1, to->trans[transnr].count);
- } else {
-#ifdef DEBUG_REGEXP_GRAPH
- printf("Found epsilon trans %d from %d to %d\n",
- transnr, tonr, to->trans[transnr].to);
-#endif
- if (to->trans[transnr].counter >= 0) {
- xmlFAReduceEpsilonTransitions(ctxt, fromnr,
- to->trans[transnr].to,
- to->trans[transnr].counter);
- } else {
- xmlFAReduceEpsilonTransitions(ctxt, fromnr,
- to->trans[transnr].to,
- counter);
- }
- }
- }
- } else {
- int newto = to->trans[transnr].to;
-
- if (to->trans[transnr].counter >= 0) {
- xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
- ctxt->states[newto],
- to->trans[transnr].counter, -1);
- } else {
- xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
- ctxt->states[newto], counter, -1);
- }
- }
- }
- to->mark = XML_REGEXP_MARK_NORMAL;
-}
-
-/**
- * xmlFAEliminateEpsilonTransitions:
- * @ctxt: a regexp parser context
- *
- */
-static void
-xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
- int statenr, transnr;
- xmlRegStatePtr state;
-
- /*
- * build the completed transitions bypassing the epsilons
- * Use a marking algorithm to avoid loops
- */
- for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
- state = ctxt->states[statenr];
- if (state == NULL)
- continue;
- for (transnr = 0;transnr < state->nbTrans;transnr++) {
- if ((state->trans[transnr].atom == NULL) &&
- (state->trans[transnr].to >= 0)) {
- if (state->trans[transnr].to == statenr) {
- state->trans[transnr].to = -1;
-#ifdef DEBUG_REGEXP_GRAPH
- printf("Removed loopback epsilon trans %d on %d\n",
- transnr, statenr);
-#endif
- } else if (state->trans[transnr].count < 0) {
- int newto = state->trans[transnr].to;
-
-#ifdef DEBUG_REGEXP_GRAPH
- printf("Found epsilon trans %d from %d to %d\n",
- transnr, statenr, newto);
-#endif
- state->mark = XML_REGEXP_MARK_START;
- xmlFAReduceEpsilonTransitions(ctxt, statenr,
- newto, state->trans[transnr].counter);
- state->mark = XML_REGEXP_MARK_NORMAL;
-#ifdef DEBUG_REGEXP_GRAPH
- } else {
- printf("Found counted transition %d on %d\n",
- transnr, statenr);
-#endif
- }
- }
- }
- }
- /*
- * Eliminate the epsilon transitions
- */
- for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
- state = ctxt->states[statenr];
- if (state == NULL)
- continue;
- for (transnr = 0;transnr < state->nbTrans;transnr++) {
- if ((state->trans[transnr].atom == NULL) &&
- (state->trans[transnr].count < 0) &&
- (state->trans[transnr].to >= 0)) {
- state->trans[transnr].to = -1;
- }
- }
- }
-
- /*
- * Use this pass to detect unreachable states too
- */
- for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
- state = ctxt->states[statenr];
- if (state != NULL)
- state->reached = 0;
- }
- state = ctxt->states[0];
- if (state != NULL)
- state->reached = 1;
- while (state != NULL) {
- xmlRegStatePtr target = NULL;
- state->reached = 2;
- /*
- * Mark all state reachable from the current reachable state
- */
- for (transnr = 0;transnr < state->nbTrans;transnr++) {
- if ((state->trans[transnr].to >= 0) &&
- ((state->trans[transnr].atom != NULL) ||
- (state->trans[transnr].count >= 0))) {
- int newto = state->trans[transnr].to;
-
- if (ctxt->states[newto] == NULL)
- continue;
- if (ctxt->states[newto]->reached == 0) {
- ctxt->states[newto]->reached = 1;
- target = ctxt->states[newto];
- }
- }
- }
- /*
- * find the next accessible state not explored
- */
- if (target == NULL) {
- for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
- state = ctxt->states[statenr];
- if ((state != NULL) && (state->reached == 1)) {
- target = state;
- break;
- }
- }
- }
- state = target;
- }
- for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
- state = ctxt->states[statenr];
- if ((state != NULL) && (state->reached == 0)) {
-#ifdef DEBUG_REGEXP_GRAPH
- printf("Removed unreachable state %d\n", statenr);
-#endif
- xmlRegFreeState(state);
- ctxt->states[statenr] = NULL;
- }
- }
-
-}
-
-/**
- * xmlFACompareAtoms:
- * @atom1: an atom
- * @atom2: an atom
- *
- * Compares two atoms to check whether they are equivatents
- *
- * Returns 1 if yes and 0 otherwise
- */
-static int
-xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
- if (atom1 == atom2)
- return(1);
- if ((atom1 == NULL) || (atom2 == NULL))
- return(0);
-
- if (atom1->type != atom2->type)
- return(0);
- switch (atom1->type) {
- case XML_REGEXP_STRING:
- return(xmlStrEqual((xmlChar *)atom1->valuep,
- (xmlChar *)atom2->valuep));
- case XML_REGEXP_EPSILON:
- return(1);
- case XML_REGEXP_CHARVAL:
- return(atom1->codepoint == atom2->codepoint);
- case XML_REGEXP_RANGES:
- TODO;
- return(0);
- default:
- break;
- }
- return(1);
-}
-
-/**
- * xmlFARecurseDeterminism:
- * @ctxt: a regexp parser context
- *
- * Check whether the associated regexp is determinist,
- * should be called after xmlFAEliminateEpsilonTransitions()
- *
- */
-static int
-xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
- int to, xmlRegAtomPtr atom) {
- int ret = 1;
- int transnr;
- xmlRegTransPtr t1;
-
- if (state == NULL)
- return(ret);
- for (transnr = 0;transnr < state->nbTrans;transnr++) {
- t1 = &(state->trans[transnr]);
- /*
- * check transitions conflicting with the one looked at
- */
- if (t1->atom == NULL) {
- if (t1->to == -1)
- continue;
- ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
- to, atom);
- if (ret == 0)
- return(0);
- continue;
- }
- if (t1->to != to)
- continue;
- if (xmlFACompareAtoms(t1->atom, atom))
- return(0);
- }
- return(ret);
-}
-
-/**
- * xmlFAComputesDeterminism:
- * @ctxt: a regexp parser context
- *
- * Check whether the associated regexp is determinist,
- * should be called after xmlFAEliminateEpsilonTransitions()
- *
- */
-static int
-xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
- int statenr, transnr;
- xmlRegStatePtr state;
- xmlRegTransPtr t1, t2;
- int i;
- int ret = 1;
-
-#ifdef DEBUG_REGEXP_GRAPH
- printf("xmlFAComputesDeterminism\n");
- xmlRegPrintCtxt(stdout, ctxt);
-#endif
- if (ctxt->determinist != -1)
- return(ctxt->determinist);
-
- /*
- * Check for all states that there isn't 2 transitions
- * with the same atom and a different target.
- */
- for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
- state = ctxt->states[statenr];
- if (state == NULL)
- continue;
- for (transnr = 0;transnr < state->nbTrans;transnr++) {
- t1 = &(state->trans[transnr]);
- /*
- * Determinism checks in case of counted or all transitions
- * will have to be handled separately
- */
- if (t1->atom == NULL)
- continue;
- if (t1->to == -1) /* eliminated */
- continue;
- for (i = 0;i < transnr;i++) {
- t2 = &(state->trans[i]);
- if (t2->to == -1) /* eliminated */
- continue;
- if (t2->atom != NULL) {
- if (t1->to == t2->to) {
- if (xmlFACompareAtoms(t1->atom, t2->atom))
- t2->to = -1; /* eliminate */
- } else {
- /* not determinist ! */
- if (xmlFACompareAtoms(t1->atom, t2->atom))
- ret = 0;
- }
- } else if (t1->to != -1) {
- /*
- * do the closure in case of remaining specific
- * epsilon transitions like choices or all
- */
- ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
- t2->to, t2->atom);
- if (ret == 0)
- return(0);
- }
- }
- if (ret == 0)
- break;
- }
- if (ret == 0)
- break;
- }
- ctxt->determinist = ret;
- return(ret);
-}
-
-/************************************************************************
- * *
- * Routines to check input against transition atoms *
- * *
- ************************************************************************/
-
-static int
-xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
- int start, int end, const xmlChar *blockName) {
- int ret = 0;
-
- switch (type) {
- case XML_REGEXP_STRING:
- case XML_REGEXP_SUBREG:
- case XML_REGEXP_RANGES:
- case XML_REGEXP_EPSILON:
- return(-1);
- case XML_REGEXP_ANYCHAR:
- ret = ((codepoint != '\n') && (codepoint != '\r'));
- break;
- case XML_REGEXP_CHARVAL:
- ret = ((codepoint >= start) && (codepoint <= end));
- break;
- case XML_REGEXP_NOTSPACE:
- neg = !neg;
- case XML_REGEXP_ANYSPACE:
- ret = ((codepoint == '\n') || (codepoint == '\r') ||
- (codepoint == '\t') || (codepoint == ' '));
- break;
- case XML_REGEXP_NOTINITNAME:
- neg = !neg;
- case XML_REGEXP_INITNAME:
- ret = (xmlIsLetter(codepoint) ||
- (codepoint == '_') || (codepoint == ':'));
- break;
- case XML_REGEXP_NOTNAMECHAR:
- neg = !neg;
- case XML_REGEXP_NAMECHAR:
- ret = (xmlIsLetter(codepoint) || xmlIsDigit(codepoint) ||
- (codepoint == '.') || (codepoint == '-') ||
- (codepoint == '_') || (codepoint == ':') ||
- xmlIsCombining(codepoint) || xmlIsExtender(codepoint));
- break;
- case XML_REGEXP_NOTDECIMAL:
- neg = !neg;
- case XML_REGEXP_DECIMAL:
- ret = xmlUCSIsCatNd(codepoint);
- break;
- case XML_REGEXP_REALCHAR:
- neg = !neg;
- case XML_REGEXP_NOTREALCHAR:
- ret = xmlUCSIsCatP(codepoint);
- if (ret == 0)
- ret = xmlUCSIsCatZ(codepoint);
- if (ret == 0)
- ret = xmlUCSIsCatC(codepoint);
- break;
- case XML_REGEXP_LETTER:
- ret = xmlUCSIsCatL(codepoint);
- break;
- case XML_REGEXP_LETTER_UPPERCASE:
- ret = xmlUCSIsCatLu(codepoint);
- break;
- case XML_REGEXP_LETTER_LOWERCASE:
- ret = xmlUCSIsCatLl(codepoint);
- break;
- case XML_REGEXP_LETTER_TITLECASE:
- ret = xmlUCSIsCatLt(codepoint);
- break;
- case XML_REGEXP_LETTER_MODIFIER:
- ret = xmlUCSIsCatLm(codepoint);
- break;
- case XML_REGEXP_LETTER_OTHERS:
- ret = xmlUCSIsCatLo(codepoint);
- break;
- case XML_REGEXP_MARK:
- ret = xmlUCSIsCatM(codepoint);
- break;
- case XML_REGEXP_MARK_NONSPACING:
- ret = xmlUCSIsCatMn(codepoint);
- break;
- case XML_REGEXP_MARK_SPACECOMBINING:
- ret = xmlUCSIsCatMc(codepoint);
- break;
- case XML_REGEXP_MARK_ENCLOSING:
- ret = xmlUCSIsCatMe(codepoint);
- break;
- case XML_REGEXP_NUMBER:
- ret = xmlUCSIsCatN(codepoint);
- break;
- case XML_REGEXP_NUMBER_DECIMAL:
- ret = xmlUCSIsCatNd(codepoint);
- break;
- case XML_REGEXP_NUMBER_LETTER:
- ret = xmlUCSIsCatNl(codepoint);
- break;
- case XML_REGEXP_NUMBER_OTHERS:
- ret = xmlUCSIsCatNo(codepoint);
- break;
- case XML_REGEXP_PUNCT:
- ret = xmlUCSIsCatP(codepoint);
- break;
- case XML_REGEXP_PUNCT_CONNECTOR:
- ret = xmlUCSIsCatPc(codepoint);
- break;
- case XML_REGEXP_PUNCT_DASH:
- ret = xmlUCSIsCatPd(codepoint);
- break;
- case XML_REGEXP_PUNCT_OPEN:
- ret = xmlUCSIsCatPs(codepoint);
- break;
- case XML_REGEXP_PUNCT_CLOSE:
- ret = xmlUCSIsCatPe(codepoint);
- break;
- case XML_REGEXP_PUNCT_INITQUOTE:
- ret = xmlUCSIsCatPi(codepoint);
- break;
- case XML_REGEXP_PUNCT_FINQUOTE:
- ret = xmlUCSIsCatPf(codepoint);
- break;
- case XML_REGEXP_PUNCT_OTHERS:
- ret = xmlUCSIsCatPo(codepoint);
- break;
- case XML_REGEXP_SEPAR:
- ret = xmlUCSIsCatZ(codepoint);
- break;
- case XML_REGEXP_SEPAR_SPACE:
- ret = xmlUCSIsCatZs(codepoint);
- break;
- case XML_REGEXP_SEPAR_LINE:
- ret = xmlUCSIsCatZl(codepoint);
- break;
- case XML_REGEXP_SEPAR_PARA:
- ret = xmlUCSIsCatZp(codepoint);
- break;
- case XML_REGEXP_SYMBOL:
- ret = xmlUCSIsCatS(codepoint);
- break;
- case XML_REGEXP_SYMBOL_MATH:
- ret = xmlUCSIsCatSm(codepoint);
- break;
- case XML_REGEXP_SYMBOL_CURRENCY:
- ret = xmlUCSIsCatSc(codepoint);
- break;
- case XML_REGEXP_SYMBOL_MODIFIER:
- ret = xmlUCSIsCatSk(codepoint);
- break;
- case XML_REGEXP_SYMBOL_OTHERS:
- ret = xmlUCSIsCatSo(codepoint);
- break;
- case XML_REGEXP_OTHER:
- ret = xmlUCSIsCatC(codepoint);
- break;
- case XML_REGEXP_OTHER_CONTROL:
- ret = xmlUCSIsCatCc(codepoint);
- break;
- case XML_REGEXP_OTHER_FORMAT:
- ret = xmlUCSIsCatCf(codepoint);
- break;
- case XML_REGEXP_OTHER_PRIVATE:
- ret = xmlUCSIsCatCo(codepoint);
- break;
- case XML_REGEXP_OTHER_NA:
- /* ret = xmlUCSIsCatCn(codepoint); */
- /* Seems it doesn't exist anymore in recent Unicode releases */
- ret = 0;
- break;
- case XML_REGEXP_BLOCK_NAME:
- ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
- break;
- }
- if (neg)
- return(!ret);
- return(ret);
-}
-
-static int
-xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
- int i, ret = 0;
- xmlRegRangePtr range;
-
- if ((atom == NULL) || (!xmlIsChar(codepoint)))
- return(-1);
-
- switch (atom->type) {
- case XML_REGEXP_SUBREG:
- case XML_REGEXP_EPSILON:
- return(-1);
- case XML_REGEXP_CHARVAL:
- return(codepoint == atom->codepoint);
- case XML_REGEXP_RANGES: {
- int accept = 0;
- for (i = 0;i < atom->nbRanges;i++) {
- range = atom->ranges[i];
- if (range->neg) {
- ret = xmlRegCheckCharacterRange(range->type, codepoint,
- 0, range->start, range->end,
- range->blockName);
- if (ret != 0)
- return(0); /* excluded char */
- } else {
- ret = xmlRegCheckCharacterRange(range->type, codepoint,
- 0, range->start, range->end,
- range->blockName);
- if (ret != 0)
- accept = 1; /* might still be excluded */
- }
- }
- return(accept);
- }
- case XML_REGEXP_STRING:
- printf("TODO: XML_REGEXP_STRING\n");
- return(-1);
- case XML_REGEXP_ANYCHAR:
- case XML_REGEXP_ANYSPACE:
- case XML_REGEXP_NOTSPACE:
- case XML_REGEXP_INITNAME:
- case XML_REGEXP_NOTINITNAME:
- case XML_REGEXP_NAMECHAR:
- case XML_REGEXP_NOTNAMECHAR:
- case XML_REGEXP_DECIMAL:
- case XML_REGEXP_NOTDECIMAL:
- case XML_REGEXP_REALCHAR:
- case XML_REGEXP_NOTREALCHAR:
- case XML_REGEXP_LETTER:
- case XML_REGEXP_LETTER_UPPERCASE:
- case XML_REGEXP_LETTER_LOWERCASE:
- case XML_REGEXP_LETTER_TITLECASE:
- case XML_REGEXP_LETTER_MODIFIER:
- case XML_REGEXP_LETTER_OTHERS:
- case XML_REGEXP_MARK:
- case XML_REGEXP_MARK_NONSPACING:
- case XML_REGEXP_MARK_SPACECOMBINING:
- case XML_REGEXP_MARK_ENCLOSING:
- case XML_REGEXP_NUMBER:
- case XML_REGEXP_NUMBER_DECIMAL:
- case XML_REGEXP_NUMBER_LETTER:
- case XML_REGEXP_NUMBER_OTHERS:
- case XML_REGEXP_PUNCT:
- case XML_REGEXP_PUNCT_CONNECTOR:
- case XML_REGEXP_PUNCT_DASH:
- case XML_REGEXP_PUNCT_OPEN:
- case XML_REGEXP_PUNCT_CLOSE:
- case XML_REGEXP_PUNCT_INITQUOTE:
- case XML_REGEXP_PUNCT_FINQUOTE:
- case XML_REGEXP_PUNCT_OTHERS:
- case XML_REGEXP_SEPAR:
- case XML_REGEXP_SEPAR_SPACE:
- case XML_REGEXP_SEPAR_LINE:
- case XML_REGEXP_SEPAR_PARA:
- case XML_REGEXP_SYMBOL:
- case XML_REGEXP_SYMBOL_MATH:
- case XML_REGEXP_SYMBOL_CURRENCY:
- case XML_REGEXP_SYMBOL_MODIFIER:
- case XML_REGEXP_SYMBOL_OTHERS:
- case XML_REGEXP_OTHER:
- case XML_REGEXP_OTHER_CONTROL:
- case XML_REGEXP_OTHER_FORMAT:
- case XML_REGEXP_OTHER_PRIVATE:
- case XML_REGEXP_OTHER_NA:
- case XML_REGEXP_BLOCK_NAME:
- ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
- (const xmlChar *)atom->valuep);
- if (atom->neg)
- ret = !ret;
- break;
- }
- return(ret);
-}
-
-/************************************************************************
- * *
- * Saving an restoring state of an execution context *
- * *
- ************************************************************************/
-
-#ifdef DEBUG_REGEXP_EXEC
-static void
-xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
- printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
- if (exec->inputStack != NULL) {
- int i;
- printf(": ");
- for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
- printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]);
- } else {
- printf(": %s", &(exec->inputString[exec->index]));
- }
- printf("\n");
-}
-#endif
-
-static void
-xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
-#ifdef DEBUG_REGEXP_EXEC
- printf("saving ");
- exec->transno++;
- xmlFARegDebugExec(exec);
- exec->transno--;
-#endif
-
- if (exec->maxRollbacks == 0) {
- exec->maxRollbacks = 4;
- exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
- sizeof(xmlRegExecRollback));
- if (exec->rollbacks == NULL) {
- fprintf(stderr, "exec save: allocation failed");
- exec->maxRollbacks = 0;
- return;
- }
- memset(exec->rollbacks, 0,
- exec->maxRollbacks * sizeof(xmlRegExecRollback));
- } else if (exec->nbRollbacks >= exec->maxRollbacks) {
- xmlRegExecRollback *tmp;
- int len = exec->maxRollbacks;
-
- exec->maxRollbacks *= 2;
- tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
- exec->maxRollbacks * sizeof(xmlRegExecRollback));
- if (tmp == NULL) {
- fprintf(stderr, "exec save: allocation failed");
- exec->maxRollbacks /= 2;
- return;
- }
- exec->rollbacks = tmp;
- tmp = &exec->rollbacks[len];
- memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
- }
- exec->rollbacks[exec->nbRollbacks].state = exec->state;
- exec->rollbacks[exec->nbRollbacks].index = exec->index;
- exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
- if (exec->comp->nbCounters > 0) {
- if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
- exec->rollbacks[exec->nbRollbacks].counts = (int *)
- xmlMalloc(exec->comp->nbCounters * sizeof(int));
- if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
- fprintf(stderr, "exec save: allocation failed");
- exec->status = -5;
- return;
- }
- }
- memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
- exec->comp->nbCounters * sizeof(int));
- }
- exec->nbRollbacks++;
-}
-
-static void
-xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
- if (exec->nbRollbacks <= 0) {
- exec->status = -1;
-#ifdef DEBUG_REGEXP_EXEC
- printf("rollback failed on empty stack\n");
-#endif
- return;
- }
- exec->nbRollbacks--;
- exec->state = exec->rollbacks[exec->nbRollbacks].state;
- exec->index = exec->rollbacks[exec->nbRollbacks].index;
- exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
- if (exec->comp->nbCounters > 0) {
- if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
- fprintf(stderr, "exec save: allocation failed");
- exec->status = -6;
- return;
- }
- memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
- exec->comp->nbCounters * sizeof(int));
- }
-
-#ifdef DEBUG_REGEXP_EXEC
- printf("restored ");
- xmlFARegDebugExec(exec);
-#endif
-}
-
-/************************************************************************
- * *
- * Verifyer, running an input against a compiled regexp *
- * *
- ************************************************************************/
-
-static int
-xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
- xmlRegExecCtxt execval;
- xmlRegExecCtxtPtr exec = &execval;
- int ret, codepoint, len;
-
- exec->inputString = content;
- exec->index = 0;
- exec->determinist = 1;
- exec->maxRollbacks = 0;
- exec->nbRollbacks = 0;
- exec->rollbacks = NULL;
- exec->status = 0;
- exec->comp = comp;
- exec->state = comp->states[0];
- exec->transno = 0;
- exec->transcount = 0;
- if (comp->nbCounters > 0) {
- exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
- if (exec->counts == NULL)
- return(-1);
- memset(exec->counts, 0, comp->nbCounters * sizeof(int));
- } else
- exec->counts = NULL;
- while ((exec->status == 0) &&
- ((exec->inputString[exec->index] != 0) ||
- (exec->state->type != XML_REGEXP_FINAL_STATE))) {
- xmlRegTransPtr trans;
- xmlRegAtomPtr atom;
-
- /*
- * End of input on non-terminal state, rollback, however we may
- * still have epsilon like transition for counted transitions
- * on counters, in that case don't break too early.
- */
- if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
- goto rollback;
-
- exec->transcount = 0;
- for (;exec->transno < exec->state->nbTrans;exec->transno++) {
- trans = &exec->state->trans[exec->transno];
- if (trans->to < 0)
- continue;
- atom = trans->atom;
- ret = 0;
- if (trans->count >= 0) {
- int count;
- xmlRegCounterPtr counter;
-
- /*
- * A counted transition.
- */
-
- count = exec->counts[trans->count];
- counter = &exec->comp->counters[trans->count];
-#ifdef DEBUG_REGEXP_EXEC
- printf("testing count %d: val %d, min %d, max %d\n",
- trans->count, count, counter->min, counter->max);
-#endif
- ret = ((count >= counter->min) && (count <= counter->max));
- } else if (atom == NULL) {
- fprintf(stderr, "epsilon transition left at runtime\n");
- exec->status = -2;
- break;
- } else if (exec->inputString[exec->index] != 0) {
- codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
- ret = xmlRegCheckCharacter(atom, codepoint);
- if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
- xmlRegStatePtr to = comp->states[trans->to];
-
- /*
- * this is a multiple input sequence
- */
- if (exec->state->nbTrans > exec->transno + 1) {
- xmlFARegExecSave(exec);
- }
- exec->transcount = 1;
- do {
- /*
- * Try to progress as much as possible on the input
- */
- if (exec->transcount == atom->max) {
- break;
- }
- exec->index += len;
- /*
- * End of input: stop here
- */
- if (exec->inputString[exec->index] == 0) {
- exec->index -= len;
- break;
- }
- if (exec->transcount >= atom->min) {
- int transno = exec->transno;
- xmlRegStatePtr state = exec->state;
-
- /*
- * The transition is acceptable save it
- */
- exec->transno = -1; /* trick */
- exec->state = to;
- xmlFARegExecSave(exec);
- exec->transno = transno;
- exec->state = state;
- }
- codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
- len);
- ret = xmlRegCheckCharacter(atom, codepoint);
- exec->transcount++;
- } while (ret == 1);
- if (exec->transcount < atom->min)
- ret = 0;
-
- /*
- * If the last check failed but one transition was found
- * possible, rollback
- */
- if (ret < 0)
- ret = 0;
- if (ret == 0) {
- goto rollback;
- }
- }
- }
- if (ret == 1) {
- if (exec->state->nbTrans > exec->transno + 1) {
- xmlFARegExecSave(exec);
- }
- if (trans->counter >= 0) {
-#ifdef DEBUG_REGEXP_EXEC
- printf("Increasing count %d\n", trans->counter);
-#endif
- exec->counts[trans->counter]++;
- }
-#ifdef DEBUG_REGEXP_EXEC
- printf("entering state %d\n", trans->to);
-#endif
- exec->state = comp->states[trans->to];
- exec->transno = 0;
- if (trans->atom != NULL) {
- exec->index += len;
- }
- goto progress;
- } else if (ret < 0) {
- exec->status = -4;
- break;
- }
- }
- if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
-rollback:
- /*
- * Failed to find a way out
- */
- exec->determinist = 0;
- xmlFARegExecRollBack(exec);
- }
-progress:
- continue;
- }
- if (exec->rollbacks != NULL) {
- if (exec->counts != NULL) {
- int i;
-
- for (i = 0;i < exec->maxRollbacks;i++)
- if (exec->rollbacks[i].counts != NULL)
- xmlFree(exec->rollbacks[i].counts);
- }
- xmlFree(exec->rollbacks);
- }
- if (exec->counts != NULL)
- xmlFree(exec->counts);
- if (exec->status == 0)
- return(1);
- if (exec->status == -1)
- return(0);
- return(exec->status);
-}
-
-/************************************************************************
- * *
- * Progressive interface to the verifyer one atom at a time *
- * *
- ************************************************************************/
-
-/**
- * xmlRegNewExecCtxt:
- * @comp: a precompiled regular expression
- * @callback: a callback function used for handling progresses in the
- * automata matching phase
- * @data: the context data associated to the callback in this context
- *
- * Build a context used for progressive evaluation of a regexp.
- *
- * Returns the new context
- */
-xmlRegExecCtxtPtr
-xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
- xmlRegExecCtxtPtr exec;
-
- if (comp == NULL)
- return(NULL);
- exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
- if (exec == NULL) {
- return(NULL);
- }
- memset(exec, 0, sizeof(xmlRegExecCtxt));
- exec->inputString = NULL;
- exec->index = 0;
- exec->determinist = 1;
- exec->maxRollbacks = 0;
- exec->nbRollbacks = 0;
- exec->rollbacks = NULL;
- exec->status = 0;
- exec->comp = comp;
- if (comp->compact == NULL)
- exec->state = comp->states[0];
- exec->transno = 0;
- exec->transcount = 0;
- exec->callback = callback;
- exec->data = data;
- if (comp->nbCounters > 0) {
- exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
- if (exec->counts == NULL) {
- xmlFree(exec);
- return(NULL);
- }
- memset(exec->counts, 0, comp->nbCounters * sizeof(int));
- } else
- exec->counts = NULL;
- exec->inputStackMax = 0;
- exec->inputStackNr = 0;
- exec->inputStack = NULL;
- return(exec);
-}
-
-/**
- * xmlRegFreeExecCtxt:
- * @exec: a regular expression evaulation context
- *
- * Free the structures associated to a regular expression evaulation context.
- */
-void
-xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
- if (exec == NULL)
- return;
-
- if (exec->rollbacks != NULL) {
- if (exec->counts != NULL) {
- int i;
-
- for (i = 0;i < exec->maxRollbacks;i++)
- if (exec->rollbacks[i].counts != NULL)
- xmlFree(exec->rollbacks[i].counts);
- }
- xmlFree(exec->rollbacks);
- }
- if (exec->counts != NULL)
- xmlFree(exec->counts);
- if (exec->inputStack != NULL) {
- int i;
-
- for (i = 0;i < exec->inputStackNr;i++) {
- if (exec->inputStack[i].value != NULL)
- xmlFree(exec->inputStack[i].value);
- }
- xmlFree(exec->inputStack);
- }
- xmlFree(exec);
-}
-
-static void
-xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
- void *data) {
-#ifdef DEBUG_PUSH
- printf("saving value: %d:%s\n", exec->inputStackNr, value);
-#endif
- if (exec->inputStackMax == 0) {
- exec->inputStackMax = 4;
- exec->inputStack = (xmlRegInputTokenPtr)
- xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
- if (exec->inputStack == NULL) {
- fprintf(stderr, "push input: allocation failed");
- exec->inputStackMax = 0;
- return;
- }
- } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
- xmlRegInputTokenPtr tmp;
-
- exec->inputStackMax *= 2;
- tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
- exec->inputStackMax * sizeof(xmlRegInputToken));
- if (tmp == NULL) {
- fprintf(stderr, "push input: allocation failed");
- exec->inputStackMax /= 2;
- return;
- }
- exec->inputStack = tmp;
- }
- exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
- exec->inputStack[exec->inputStackNr].data = data;
- exec->inputStackNr++;
- exec->inputStack[exec->inputStackNr].value = NULL;
- exec->inputStack[exec->inputStackNr].data = NULL;
-}
-
-
-/**
- * xmlRegCompactPushString:
- * @exec: a regexp execution context
- * @comp: the precompiled exec with a compact table
- * @value: a string token input
- * @data: data associated to the token to reuse in callbacks
- *
- * Push one input token in the execution context
- *
- * Returns: 1 if the regexp reached a final state, 0 if non-final, and
- * a negative value in case of error.
- */
-static int
-xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
- xmlRegexpPtr comp,
- const xmlChar *value,
- void *data) {
- int state = exec->index;
- int i, target;
-
- if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
- return(-1);
-
- if (value == NULL) {
- /*
- * are we at a final state ?
- */
- if (comp->compact[state * (comp->nbstrings + 1)] ==
- XML_REGEXP_FINAL_STATE)
- return(1);
- return(0);
- }
-
-#ifdef DEBUG_PUSH
- printf("value pushed: %s\n", value);
-#endif
-
- /*
- * Examine all outside transition from current state
- */
- for (i = 0;i < comp->nbstrings;i++) {
- target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
- if ((target > 0) && (target <= comp->nbstates)) {
- target--; /* to avoid 0 */
- if (xmlStrEqual(comp->stringMap[i], value)) {
- exec->index = target;
- if ((exec->callback != NULL) && (comp->transdata != NULL)) {
- exec->callback(exec->data, value,
- comp->transdata[state * comp->nbstrings + i], data);
- }
-#ifdef DEBUG_PUSH
- printf("entering state %d\n", target);
-#endif
- if (comp->compact[target * (comp->nbstrings + 1)] ==
- XML_REGEXP_FINAL_STATE)
- return(1);
- return(0);
- }
- }
- }
- /*
- * Failed to find an exit transition out from current state for the
- * current token
- */
-#ifdef DEBUG_PUSH
- printf("failed to find a transition for %s on state %d\n", value, state);
-#endif
- exec->status = -1;
- return(-1);
-}
-
-/**
- * xmlRegExecPushString:
- * @exec: a regexp execution context
- * @value: a string token input
- * @data: data associated to the token to reuse in callbacks
- *
- * Push one input token in the execution context
- *
- * Returns: 1 if the regexp reached a final state, 0 if non-final, and
- * a negative value in case of error.
- */
-int
-xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
- void *data) {
- xmlRegTransPtr trans;
- xmlRegAtomPtr atom;
- int ret;
- int final = 0;
-
- if (exec == NULL)
- return(-1);
- if (exec->comp == NULL)
- return(-1);
- if (exec->status != 0)
- return(exec->status);
-
- if (exec->comp->compact != NULL)
- return(xmlRegCompactPushString(exec, exec->comp, value, data));
-
- if (value == NULL) {
- if (exec->state->type == XML_REGEXP_FINAL_STATE)
- return(1);
- final = 1;
- }
-
-#ifdef DEBUG_PUSH
- printf("value pushed: %s\n", value);
-#endif
- /*
- * If we have an active rollback stack push the new value there
- * and get back to where we were left
- */
- if ((value != NULL) && (exec->inputStackNr > 0)) {
- xmlFARegExecSaveInputString(exec, value, data);
- value = exec->inputStack[exec->index].value;
- data = exec->inputStack[exec->index].data;
-#ifdef DEBUG_PUSH
- printf("value loaded: %s\n", value);
-#endif
- }
-
- while ((exec->status == 0) &&
- ((value != NULL) ||
- ((final == 1) &&
- (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
-
- /*
- * End of input on non-terminal state, rollback, however we may
- * still have epsilon like transition for counted transitions
- * on counters, in that case don't break too early.
- */
- if ((value == NULL) && (exec->counts == NULL))
- goto rollback;
-
- exec->transcount = 0;
- for (;exec->transno < exec->state->nbTrans;exec->transno++) {
- trans = &exec->state->trans[exec->transno];
- if (trans->to < 0)
- continue;
- atom = trans->atom;
- ret = 0;
- if (trans->count == REGEXP_ALL_LAX_COUNTER) {
- int i;
- int count;
- xmlRegTransPtr t;
- xmlRegCounterPtr counter;
-
- ret = 0;
-
-#ifdef DEBUG_PUSH
- printf("testing all lax %d\n", trans->count);
-#endif
- /*
- * Check all counted transitions from the current state
- */
- if ((value == NULL) && (final)) {
- ret = 1;
- } else if (value != NULL) {
- for (i = 0;i < exec->state->nbTrans;i++) {
- t = &exec->state->trans[i];
- if ((t->counter < 0) || (t == trans))
- continue;
- counter = &exec->comp->counters[t->counter];
- count = exec->counts[t->counter];
- if ((count < counter->max) &&
- (t->atom != NULL) &&
- (xmlStrEqual(value, t->atom->valuep))) {
- ret = 0;
- break;
- }
- if ((count >= counter->min) &&
- (count < counter->max) &&
- (xmlStrEqual(value, t->atom->valuep))) {
- ret = 1;
- break;
- }
- }
- }
- } else if (trans->count == REGEXP_ALL_COUNTER) {
- int i;
- int count;
- xmlRegTransPtr t;
- xmlRegCounterPtr counter;
-
- ret = 1;
-
-#ifdef DEBUG_PUSH
- printf("testing all %d\n", trans->count);
-#endif
- /*
- * Check all counted transitions from the current state
- */
- for (i = 0;i < exec->state->nbTrans;i++) {
- t = &exec->state->trans[i];
- if ((t->counter < 0) || (t == trans))
- continue;
- counter = &exec->comp->counters[t->counter];
- count = exec->counts[t->counter];
- if ((count < counter->min) || (count > counter->max)) {
- ret = 0;
- break;
- }
- }
- } else if (trans->count >= 0) {
- int count;
- xmlRegCounterPtr counter;
-
- /*
- * A counted transition.
- */
-
- count = exec->counts[trans->count];
- counter = &exec->comp->counters[trans->count];
-#ifdef DEBUG_PUSH
- printf("testing count %d: val %d, min %d, max %d\n",
- trans->count, count, counter->min, counter->max);
-#endif
- ret = ((count >= counter->min) && (count <= counter->max));
- } else if (atom == NULL) {
- fprintf(stderr, "epsilon transition left at runtime\n");
- exec->status = -2;
- break;
- } else if (value != NULL) {
- ret = xmlStrEqual(value, atom->valuep);
- if ((ret == 1) && (trans->counter >= 0)) {
- xmlRegCounterPtr counter;
- int count;
-
- count = exec->counts[trans->counter];
- counter = &exec->comp->counters[trans->counter];
- if (count >= counter->max)
- ret = 0;
- }
-
- if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
- xmlRegStatePtr to = exec->comp->states[trans->to];
-
- /*
- * this is a multiple input sequence
- */
- if (exec->state->nbTrans > exec->transno + 1) {
- if (exec->inputStackNr <= 0) {
- xmlFARegExecSaveInputString(exec, value, data);
- }
- xmlFARegExecSave(exec);
- }
- exec->transcount = 1;
- do {
- /*
- * Try to progress as much as possible on the input
- */
- if (exec->transcount == atom->max) {
- break;
- }
- exec->index++;
- value = exec->inputStack[exec->index].value;
- data = exec->inputStack[exec->index].data;
-#ifdef DEBUG_PUSH
- printf("value loaded: %s\n", value);
-#endif
-
- /*
- * End of input: stop here
- */
- if (value == NULL) {
- exec->index --;
- break;
- }
- if (exec->transcount >= atom->min) {
- int transno = exec->transno;
- xmlRegStatePtr state = exec->state;
-
- /*
- * The transition is acceptable save it
- */
- exec->transno = -1; /* trick */
- exec->state = to;
- if (exec->inputStackNr <= 0) {
- xmlFARegExecSaveInputString(exec, value, data);
- }
- xmlFARegExecSave(exec);
- exec->transno = transno;
- exec->state = state;
- }
- ret = xmlStrEqual(value, atom->valuep);
- exec->transcount++;
- } while (ret == 1);
- if (exec->transcount < atom->min)
- ret = 0;
-
- /*
- * If the last check failed but one transition was found
- * possible, rollback
- */
- if (ret < 0)
- ret = 0;
- if (ret == 0) {
- goto rollback;
- }
- }
- }
- if (ret == 1) {
- if ((exec->callback != NULL) && (atom != NULL)) {
- exec->callback(exec->data, atom->valuep,
- atom->data, data);
- }
- if (exec->state->nbTrans > exec->transno + 1) {
- if (exec->inputStackNr <= 0) {
- xmlFARegExecSaveInputString(exec, value, data);
- }
- xmlFARegExecSave(exec);
- }
- if (trans->counter >= 0) {
-#ifdef DEBUG_PUSH
- printf("Increasing count %d\n", trans->counter);
-#endif
- exec->counts[trans->counter]++;
- }
-#ifdef DEBUG_PUSH
- printf("entering state %d\n", trans->to);
-#endif
- exec->state = exec->comp->states[trans->to];
- exec->transno = 0;
- if (trans->atom != NULL) {
- if (exec->inputStack != NULL) {
- exec->index++;
- if (exec->index < exec->inputStackNr) {
- value = exec->inputStack[exec->index].value;
- data = exec->inputStack[exec->index].data;
-#ifdef DEBUG_PUSH
- printf("value loaded: %s\n", value);
-#endif
- } else {
- value = NULL;
- data = NULL;
-#ifdef DEBUG_PUSH
- printf("end of input\n");
-#endif
- }
- } else {
- value = NULL;
- data = NULL;
-#ifdef DEBUG_PUSH
- printf("end of input\n");
-#endif
- }
- }
- goto progress;
- } else if (ret < 0) {
- exec->status = -4;
- break;
- }
- }
- if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
-rollback:
- /*
- * Failed to find a way out
- */
- exec->determinist = 0;
- xmlFARegExecRollBack(exec);
- if (exec->status == 0) {
- value = exec->inputStack[exec->index].value;
- data = exec->inputStack[exec->index].data;
-#ifdef DEBUG_PUSH
- printf("value loaded: %s\n", value);
-#endif
- }
- }
-progress:
- continue;
- }
- if (exec->status == 0) {
- return(exec->state->type == XML_REGEXP_FINAL_STATE);
- }
- return(exec->status);
-}
-
-#if 0
-static int
-xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
- xmlRegTransPtr trans;
- xmlRegAtomPtr atom;
- int ret;
- int codepoint, len;
-
- if (exec == NULL)
- return(-1);
- if (exec->status != 0)
- return(exec->status);
-
- while ((exec->status == 0) &&
- ((exec->inputString[exec->index] != 0) ||
- (exec->state->type != XML_REGEXP_FINAL_STATE))) {
-
- /*
- * End of input on non-terminal state, rollback, however we may
- * still have epsilon like transition for counted transitions
- * on counters, in that case don't break too early.
- */
- if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
- goto rollback;
-
- exec->transcount = 0;
- for (;exec->transno < exec->state->nbTrans;exec->transno++) {
- trans = &exec->state->trans[exec->transno];
- if (trans->to < 0)
- continue;
- atom = trans->atom;
- ret = 0;
- if (trans->count >= 0) {
- int count;
- xmlRegCounterPtr counter;
-
- /*
- * A counted transition.
- */
-
- count = exec->counts[trans->count];
- counter = &exec->comp->counters[trans->count];
-#ifdef DEBUG_REGEXP_EXEC
- printf("testing count %d: val %d, min %d, max %d\n",
- trans->count, count, counter->min, counter->max);
-#endif
- ret = ((count >= counter->min) && (count <= counter->max));
- } else if (atom == NULL) {
- fprintf(stderr, "epsilon transition left at runtime\n");
- exec->status = -2;
- break;
- } else if (exec->inputString[exec->index] != 0) {
- codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
- ret = xmlRegCheckCharacter(atom, codepoint);
- if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
- xmlRegStatePtr to = exec->comp->states[trans->to];
-
- /*
- * this is a multiple input sequence
- */
- if (exec->state->nbTrans > exec->transno + 1) {
- xmlFARegExecSave(exec);
- }
- exec->transcount = 1;
- do {
- /*
- * Try to progress as much as possible on the input
- */
- if (exec->transcount == atom->max) {
- break;
- }
- exec->index += len;
- /*
- * End of input: stop here
- */
- if (exec->inputString[exec->index] == 0) {
- exec->index -= len;
- break;
- }
- if (exec->transcount >= atom->min) {
- int transno = exec->transno;
- xmlRegStatePtr state = exec->state;
-
- /*
- * The transition is acceptable save it
- */
- exec->transno = -1; /* trick */
- exec->state = to;
- xmlFARegExecSave(exec);
- exec->transno = transno;
- exec->state = state;
- }
- codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
- len);
- ret = xmlRegCheckCharacter(atom, codepoint);
- exec->transcount++;
- } while (ret == 1);
- if (exec->transcount < atom->min)
- ret = 0;
-
- /*
- * If the last check failed but one transition was found
- * possible, rollback
- */
- if (ret < 0)
- ret = 0;
- if (ret == 0) {
- goto rollback;
- }
- }
- }
- if (ret == 1) {
- if (exec->state->nbTrans > exec->transno + 1) {
- xmlFARegExecSave(exec);
- }
- if (trans->counter >= 0) {
-#ifdef DEBUG_REGEXP_EXEC
- printf("Increasing count %d\n", trans->counter);
-#endif
- exec->counts[trans->counter]++;
- }
-#ifdef DEBUG_REGEXP_EXEC
- printf("entering state %d\n", trans->to);
-#endif
- exec->state = exec->comp->states[trans->to];
- exec->transno = 0;
- if (trans->atom != NULL) {
- exec->index += len;
- }
- goto progress;
- } else if (ret < 0) {
- exec->status = -4;
- break;
- }
- }
- if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
-rollback:
- /*
- * Failed to find a way out
- */
- exec->determinist = 0;
- xmlFARegExecRollBack(exec);
- }
-progress:
- continue;
- }
-}
-#endif
-/************************************************************************
- * *
- * Parser for the Shemas Datatype Regular Expressions *
- * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs *
- * *
- ************************************************************************/
-
-/**
- * xmlFAIsChar:
- * @ctxt: a regexp parser context
- *
- * [10] Char ::= [^.\?*+()|#x5B#x5D]
- */
-static int
-xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
- int cur;
- int len;
-
- cur = CUR_SCHAR(ctxt->cur, len);
- if ((cur == '.') || (cur == '\\') || (cur == '?') ||
- (cur == '*') || (cur == '+') || (cur == '(') ||
- (cur == ')') || (cur == '|') || (cur == 0x5B) ||
- (cur == 0x5D) || (cur == 0))
- return(-1);
- return(cur);
-}
-
-/**
- * xmlFAParseCharProp:
- * @ctxt: a regexp parser context
- *
- * [27] charProp ::= IsCategory | IsBlock
- * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation |
- * Separators | Symbols | Others
- * [29] Letters ::= 'L' [ultmo]?
- * [30] Marks ::= 'M' [nce]?
- * [31] Numbers ::= 'N' [dlo]?
- * [32] Punctuation ::= 'P' [cdseifo]?
- * [33] Separators ::= 'Z' [slp]?
- * [34] Symbols ::= 'S' [mcko]?
- * [35] Others ::= 'C' [cfon]?
- * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+
- */
-static void
-xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
- int cur;
- xmlRegAtomType type = 0;
- xmlChar *blockName = NULL;
-
- cur = CUR;
- if (cur == 'L') {
- NEXT;
- cur = CUR;
- if (cur == 'u') {
- NEXT;
- type = XML_REGEXP_LETTER_UPPERCASE;
- } else if (cur == 'l') {
- NEXT;
- type = XML_REGEXP_LETTER_LOWERCASE;
- } else if (cur == 't') {
- NEXT;
- type = XML_REGEXP_LETTER_TITLECASE;
- } else if (cur == 'm') {
- NEXT;
- type = XML_REGEXP_LETTER_MODIFIER;
- } else if (cur == 'o') {
- NEXT;
- type = XML_REGEXP_LETTER_OTHERS;
- } else {
- type = XML_REGEXP_LETTER;
- }
- } else if (cur == 'M') {
- NEXT;
- cur = CUR;
- if (cur == 'n') {
- NEXT;
- /* nonspacing */
- type = XML_REGEXP_MARK_NONSPACING;
- } else if (cur == 'c') {
- NEXT;
- /* spacing combining */
- type = XML_REGEXP_MARK_SPACECOMBINING;
- } else if (cur == 'e') {
- NEXT;
- /* enclosing */
- type = XML_REGEXP_MARK_ENCLOSING;
- } else {
- /* all marks */
- type = XML_REGEXP_MARK;
- }
- } else if (cur == 'N') {
- NEXT;
- cur = CUR;
- if (cur == 'd') {
- NEXT;
- /* digital */
- type = XML_REGEXP_NUMBER_DECIMAL;
- } else if (cur == 'l') {
- NEXT;
- /* letter */
- type = XML_REGEXP_NUMBER_LETTER;
- } else if (cur == 'o') {
- NEXT;
- /* other */
- type = XML_REGEXP_NUMBER_OTHERS;
- } else {
- /* all numbers */
- type = XML_REGEXP_NUMBER;
- }
- } else if (cur == 'P') {
- NEXT;
- cur = CUR;
- if (cur == 'c') {
- NEXT;
- /* connector */
- type = XML_REGEXP_PUNCT_CONNECTOR;
- } else if (cur == 'd') {
- NEXT;
- /* dash */
- type = XML_REGEXP_PUNCT_DASH;
- } else if (cur == 's') {
- NEXT;
- /* open */
- type = XML_REGEXP_PUNCT_OPEN;
- } else if (cur == 'e') {
- NEXT;
- /* close */
- type = XML_REGEXP_PUNCT_CLOSE;
- } else if (cur == 'i') {
- NEXT;
- /* initial quote */
- type = XML_REGEXP_PUNCT_INITQUOTE;
- } else if (cur == 'f') {
- NEXT;
- /* final quote */
- type = XML_REGEXP_PUNCT_FINQUOTE;
- } else if (cur == 'o') {
- NEXT;
- /* other */
- type = XML_REGEXP_PUNCT_OTHERS;
- } else {
- /* all punctuation */
- type = XML_REGEXP_PUNCT;
- }
- } else if (cur == 'Z') {
- NEXT;
- cur = CUR;
- if (cur == 's') {
- NEXT;
- /* space */
- type = XML_REGEXP_SEPAR_SPACE;
- } else if (cur == 'l') {
- NEXT;
- /* line */
- type = XML_REGEXP_SEPAR_LINE;
- } else if (cur == 'p') {
- NEXT;
- /* paragraph */
- type = XML_REGEXP_SEPAR_PARA;
- } else {
- /* all separators */
- type = XML_REGEXP_SEPAR;
- }
- } else if (cur == 'S') {
- NEXT;
- cur = CUR;
- if (cur == 'm') {
- NEXT;
- type = XML_REGEXP_SYMBOL_MATH;
- /* math */
- } else if (cur == 'c') {
- NEXT;
- type = XML_REGEXP_SYMBOL_CURRENCY;
- /* currency */
- } else if (cur == 'k') {
- NEXT;
- type = XML_REGEXP_SYMBOL_MODIFIER;
- /* modifiers */
- } else if (cur == 'o') {
- NEXT;
- type = XML_REGEXP_SYMBOL_OTHERS;
- /* other */
- } else {
- /* all symbols */
- type = XML_REGEXP_SYMBOL;
- }
- } else if (cur == 'C') {
- NEXT;
- cur = CUR;
- if (cur == 'c') {
- NEXT;
- /* control */
- type = XML_REGEXP_OTHER_CONTROL;
- } else if (cur == 'f') {
- NEXT;
- /* format */
- type = XML_REGEXP_OTHER_FORMAT;
- } else if (cur == 'o') {
- NEXT;
- /* private use */
- type = XML_REGEXP_OTHER_PRIVATE;
- } else if (cur == 'n') {
- NEXT;
- /* not assigned */
- type = XML_REGEXP_OTHER_NA;
- } else {
- /* all others */
- type = XML_REGEXP_OTHER;
- }
- } else if (cur == 'I') {
- const xmlChar *start;
- NEXT;
- cur = CUR;
- if (cur != 's') {
- ERROR("IsXXXX expected");
- return;
- }
- NEXT;
- start = ctxt->cur;
- cur = CUR;
- if (((cur >= 'a') && (cur <= 'z')) ||
- ((cur >= 'A') && (cur <= 'Z')) ||
- ((cur >= '0') && (cur <= '9')) ||
- (cur == 0x2D)) {
- NEXT;
- cur = CUR;
- while (((cur >= 'a') && (cur <= 'z')) ||
- ((cur >= 'A') && (cur <= 'Z')) ||
- ((cur >= '0') && (cur <= '9')) ||
- (cur == 0x2D)) {
- NEXT;
- cur = CUR;
- }
- }
- type = XML_REGEXP_BLOCK_NAME;
- blockName = xmlStrndup(start, ctxt->cur - start);
- } else {
- ERROR("Unknown char property");
- return;
- }
- if (ctxt->atom == NULL) {
- ctxt->atom = xmlRegNewAtom(ctxt, type);
- if (ctxt->atom != NULL)
- ctxt->atom->valuep = blockName;
- } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
- xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
- type, 0, 0, blockName);
- }
-}
-
-/**
- * xmlFAParseCharClassEsc:
- * @ctxt: a regexp parser context
- *
- * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
- * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
- * [25] catEsc ::= '\p{' charProp '}'
- * [26] complEsc ::= '\P{' charProp '}'
- * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
- */
-static void
-xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
- int cur;
-
- if (CUR == '.') {
- if (ctxt->atom == NULL) {
- ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
- } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
- xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
- XML_REGEXP_ANYCHAR, 0, 0, NULL);
- }
- NEXT;
- return;
- }
- if (CUR != '\\') {
- ERROR("Escaped sequence: expecting \\");
- return;
- }
- NEXT;
- cur = CUR;
- if (cur == 'p') {
- NEXT;
- if (CUR != '{') {
- ERROR("Expecting '{'");
- return;
- }
- NEXT;
- xmlFAParseCharProp(ctxt);
- if (CUR != '}') {
- ERROR("Expecting '}'");
- return;
- }
- NEXT;
- } else if (cur == 'P') {
- NEXT;
- if (CUR != '{') {
- ERROR("Expecting '{'");
- return;
- }
- NEXT;
- xmlFAParseCharProp(ctxt);
- ctxt->atom->neg = 1;
- if (CUR != '}') {
- ERROR("Expecting '}'");
- return;
- }
- NEXT;
- } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
- (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
- (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
- (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
- (cur == 0x5E)) {
- if (ctxt->atom == NULL) {
- ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
- if (ctxt->atom != NULL)
- ctxt->atom->codepoint = cur;
- } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
- xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
- XML_REGEXP_CHARVAL, cur, cur, NULL);
- }
- NEXT;
- } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
- (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
- (cur == 'w') || (cur == 'W')) {
- xmlRegAtomType type = XML_REGEXP_ANYSPACE;
-
- switch (cur) {
- case 's':
- type = XML_REGEXP_ANYSPACE;
- break;
- case 'S':
- type = XML_REGEXP_NOTSPACE;
- break;
- case 'i':
- type = XML_REGEXP_INITNAME;
- break;
- case 'I':
- type = XML_REGEXP_NOTINITNAME;
- break;
- case 'c':
- type = XML_REGEXP_NAMECHAR;
- break;
- case 'C':
- type = XML_REGEXP_NOTNAMECHAR;
- break;
- case 'd':
- type = XML_REGEXP_DECIMAL;
- break;
- case 'D':
- type = XML_REGEXP_NOTDECIMAL;
- break;
- case 'w':
- type = XML_REGEXP_REALCHAR;
- break;
- case 'W':
- type = XML_REGEXP_NOTREALCHAR;
- break;
- }
- NEXT;
- if (ctxt->atom == NULL) {
- ctxt->atom = xmlRegNewAtom(ctxt, type);
- } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
- xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
- type, 0, 0, NULL);
- }
- }
-}
-
-/**
- * xmlFAParseCharRef:
- * @ctxt: a regexp parser context
- *
- * [19] XmlCharRef ::= ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' )
- */
-static int
-xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) {
- int ret = 0, cur;
-
- if ((CUR != '&') || (NXT(1) != '#'))
- return(-1);
- NEXT;
- NEXT;
- cur = CUR;
- if (cur == 'x') {
- NEXT;
- cur = CUR;
- if (((cur >= '0') && (cur <= '9')) ||
- ((cur >= 'a') && (cur <= 'f')) ||
- ((cur >= 'A') && (cur <= 'F'))) {
- while (((cur >= '0') && (cur <= '9')) ||
- ((cur >= 'A') && (cur <= 'F'))) {
- if ((cur >= '0') && (cur <= '9'))
- ret = ret * 16 + cur - '0';
- else if ((cur >= 'a') && (cur <= 'f'))
- ret = ret * 16 + 10 + (cur - 'a');
- else
- ret = ret * 16 + 10 + (cur - 'A');
- NEXT;
- cur = CUR;
- }
- } else {
- ERROR("Char ref: expecting [0-9A-F]");
- return(-1);
- }
- } else {
- if ((cur >= '0') && (cur <= '9')) {
- while ((cur >= '0') && (cur <= '9')) {
- ret = ret * 10 + cur - '0';
- NEXT;
- cur = CUR;
- }
- } else {
- ERROR("Char ref: expecting [0-9]");
- return(-1);
- }
- }
- if (cur != ';') {
- ERROR("Char ref: expecting ';'");
- return(-1);
- } else {
- NEXT;
- }
- return(ret);
-}
-
-/**
- * xmlFAParseCharRange:
- * @ctxt: a regexp parser context
- *
- * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash
- * [18] seRange ::= charOrEsc '-' charOrEsc
- * [20] charOrEsc ::= XmlChar | SingleCharEsc
- * [21] XmlChar ::= [^\#x2D#x5B#x5D]
- * [22] XmlCharIncDash ::= [^\#x5B#x5D]
- */
-static void
-xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
- int cur;
- int start = -1;
- int end = -1;
-
- if ((CUR == '&') && (NXT(1) == '#')) {
- end = start = xmlFAParseCharRef(ctxt);
- xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
- XML_REGEXP_CHARVAL, start, end, NULL);
- return;
- }
- cur = CUR;
- if (cur == '\\') {
- NEXT;
- cur = CUR;
- switch (cur) {
- case 'n': start = 0xA; break;
- case 'r': start = 0xD; break;
- case 't': start = 0x9; break;
- case '\\': case '|': case '.': case '-': case '^': case '?':
- case '*': case '+': case '{': case '}': case '(': case ')':
- case '[': case ']':
- start = cur; break;
- default:
- ERROR("Invalid escape value");
- return;
- }
- end = start;
- } else if ((cur != 0x5B) && (cur != 0x5D)) {
- end = start = cur;
- } else {
- ERROR("Expecting a char range");
- return;
- }
- NEXT;
- if (start == '-') {
- return;
- }
- cur = CUR;
- if (cur != '-') {
- xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
- XML_REGEXP_CHARVAL, start, end, NULL);
- return;
- }
- NEXT;
- cur = CUR;
- if (cur == '\\') {
- NEXT;
- cur = CUR;
- switch (cur) {
- case 'n': end = 0xA; break;
- case 'r': end = 0xD; break;
- case 't': end = 0x9; break;
- case '\\': case '|': case '.': case '-': case '^': case '?':
- case '*': case '+': case '{': case '}': case '(': case ')':
- case '[': case ']':
- end = cur; break;
- default:
- ERROR("Invalid escape value");
- return;
- }
- } else if ((cur != 0x5B) && (cur != 0x5D)) {
- end = cur;
- } else {
- ERROR("Expecting the end of a char range");
- return;
- }
- NEXT;
- /* TODO check that the values are acceptable character ranges for XML */
- if (end < start) {
- ERROR("End of range is before start of range");
- } else {
- xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
- XML_REGEXP_CHARVAL, start, end, NULL);
- }
- return;
-}
-
-/**
- * xmlFAParsePosCharGroup:
- * @ctxt: a regexp parser context
- *
- * [14] posCharGroup ::= ( charRange | charClassEsc )+
- */
-static void
-xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
- do {
- if ((CUR == '\\') || (CUR == '.')) {
- xmlFAParseCharClassEsc(ctxt);
- } else {
- xmlFAParseCharRange(ctxt);
- }
- } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
- (ctxt->error == 0));
-}
-
-/**
- * xmlFAParseCharGroup:
- * @ctxt: a regexp parser context
- *
- * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub
- * [15] negCharGroup ::= '^' posCharGroup
- * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
- * [12] charClassExpr ::= '[' charGroup ']'
- */
-static void
-xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
- int n = ctxt->neg;
- while ((CUR != ']') && (ctxt->error == 0)) {
- if (CUR == '^') {
- int neg = ctxt->neg;
-
- NEXT;
- ctxt->neg = !ctxt->neg;
- xmlFAParsePosCharGroup(ctxt);
- ctxt->neg = neg;
- } else if (CUR == '-') {
- NEXT;
- ctxt->neg = !ctxt->neg;
- if (CUR != '[') {
- ERROR("charClassExpr: '[' expected");
- break;
- }
- NEXT;
- xmlFAParseCharGroup(ctxt);
- if (CUR == ']') {
- NEXT;
- } else {
- ERROR("charClassExpr: ']' expected");
- break;
- }
- break;
- } else if (CUR != ']') {
- xmlFAParsePosCharGroup(ctxt);
- }
- }
- ctxt->neg = n;
-}
-
-/**
- * xmlFAParseCharClass:
- * @ctxt: a regexp parser context
- *
- * [11] charClass ::= charClassEsc | charClassExpr
- * [12] charClassExpr ::= '[' charGroup ']'
- */
-static void
-xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
- if (CUR == '[') {
- NEXT;
- ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
- if (ctxt->atom == NULL)
- return;
- xmlFAParseCharGroup(ctxt);
- if (CUR == ']') {
- NEXT;
- } else {
- ERROR("xmlFAParseCharClass: ']' expected");
- }
- } else {
- xmlFAParseCharClassEsc(ctxt);
- }
-}
-
-/**
- * xmlFAParseQuantExact:
- * @ctxt: a regexp parser context
- *
- * [8] QuantExact ::= [0-9]+
- */
-static int
-xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
- int ret = 0;
- int ok = 0;
-
- while ((CUR >= '0') && (CUR <= '9')) {
- ret = ret * 10 + (CUR - '0');
- ok = 1;
- NEXT;
- }
- if (ok != 1) {
- return(-1);
- }
- return(ret);
-}
-
-/**
- * xmlFAParseQuantifier:
- * @ctxt: a regexp parser context
- *
- * [4] quantifier ::= [?*+] | ( '{' quantity '}' )
- * [5] quantity ::= quantRange | quantMin | QuantExact
- * [6] quantRange ::= QuantExact ',' QuantExact
- * [7] quantMin ::= QuantExact ','
- * [8] QuantExact ::= [0-9]+
- */
-static int
-xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
- int cur;
-
- cur = CUR;
- if ((cur == '?') || (cur == '*') || (cur == '+')) {
- if (ctxt->atom != NULL) {
- if (cur == '?')
- ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
- else if (cur == '*')
- ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
- else if (cur == '+')
- ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
- }
- NEXT;
- return(1);
- }
- if (cur == '{') {
- int min = 0, max = 0;
-
- NEXT;
- cur = xmlFAParseQuantExact(ctxt);
- if (cur >= 0)
- min = cur;
- if (CUR == ',') {
- NEXT;
- cur = xmlFAParseQuantExact(ctxt);
- if (cur >= 0)
- max = cur;
- }
- if (CUR == '}') {
- NEXT;
- } else {
- ERROR("Unterminated quantifier");
- }
- if (max == 0)
- max = min;
- if (ctxt->atom != NULL) {
- ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
- ctxt->atom->min = min;
- ctxt->atom->max = max;
- }
- return(1);
- }
- return(0);
-}
-
-/**
- * xmlFAParseAtom:
- * @ctxt: a regexp parser context
- *
- * [9] atom ::= Char | charClass | ( '(' regExp ')' )
- */
-static int
-xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
- int codepoint, len;
-
- codepoint = xmlFAIsChar(ctxt);
- if (codepoint > 0) {
- ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
- if (ctxt->atom == NULL)
- return(-1);
- codepoint = CUR_SCHAR(ctxt->cur, len);
- ctxt->atom->codepoint = codepoint;
- NEXTL(len);
- return(1);
- } else if (CUR == '|') {
- return(0);
- } else if (CUR == 0) {
- return(0);
- } else if (CUR == ')') {
- return(0);
- } else if (CUR == '(') {
- xmlRegStatePtr start, oldend;
-
- NEXT;
- xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
- start = ctxt->state;
- oldend = ctxt->end;
- ctxt->end = NULL;
- ctxt->atom = NULL;
- xmlFAParseRegExp(ctxt, 0);
- if (CUR == ')') {
- NEXT;
- } else {
- ERROR("xmlFAParseAtom: expecting ')'");
- }
- ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
- if (ctxt->atom == NULL)
- return(-1);
- ctxt->atom->start = start;
- ctxt->atom->stop = ctxt->state;
- ctxt->end = oldend;
- return(1);
- } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
- xmlFAParseCharClass(ctxt);
- return(1);
- }
- return(0);
-}
-
-/**
- * xmlFAParsePiece:
- * @ctxt: a regexp parser context
- *
- * [3] piece ::= atom quantifier?
- */
-static int
-xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
- int ret;
-
- ctxt->atom = NULL;
- ret = xmlFAParseAtom(ctxt);
- if (ret == 0)
- return(0);
- if (ctxt->atom == NULL) {
- ERROR("internal: no atom generated");
- }
- xmlFAParseQuantifier(ctxt);
- return(1);
-}
-
-/**
- * xmlFAParseBranch:
- * @ctxt: a regexp parser context
- * @first: is taht the first
- *
- * [2] branch ::= piece*
- */
-static void
-xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, int first) {
- xmlRegStatePtr previous;
- xmlRegAtomPtr prevatom = NULL;
- int ret;
-
- previous = ctxt->state;
- ret = xmlFAParsePiece(ctxt);
- if (ret != 0) {
- if (first) {
- xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom);
- previous = ctxt->state;
- } else {
- prevatom = ctxt->atom;
- }
- ctxt->atom = NULL;
- }
- while ((ret != 0) && (ctxt->error == 0)) {
- ret = xmlFAParsePiece(ctxt);
- if (ret != 0) {
- if (first) {
- xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom);
- } else {
- xmlFAGenerateTransitions(ctxt, previous, NULL, prevatom);
- prevatom = ctxt->atom;
- }
- previous = ctxt->state;
- ctxt->atom = NULL;
- }
- }
- if (!first) {
- xmlFAGenerateTransitions(ctxt, previous, ctxt->end, prevatom);
- }
-}
-
-/**
- * xmlFAParseRegExp:
- * @ctxt: a regexp parser context
- * @top: is that the top-level expressions ?
- *
- * [1] regExp ::= branch ( '|' branch )*
- */
-static void
-xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
- xmlRegStatePtr start, end, oldend;
-
- oldend = ctxt->end;
-
- start = ctxt->state;
- xmlFAParseBranch(ctxt, (ctxt->end == NULL));
- if (CUR != '|') {
- ctxt->end = ctxt->state;
- return;
- }
- end = ctxt->state;
- while ((CUR == '|') && (ctxt->error == 0)) {
- NEXT;
- ctxt->state = start;
- ctxt->end = end;
- xmlFAParseBranch(ctxt, 0);
- }
- if (!top)
- ctxt->end = oldend;
-}
-
-/************************************************************************
- * *
- * The basic API *
- * *
- ************************************************************************/
-
-/**
- * xmlRegexpPrint:
- * @output: the file for the output debug
- * @regexp: the compiled regexp
- *
- * Print the content of the compiled regular expression
- */
-void
-xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
- int i;
-
- fprintf(output, " regexp: ");
- if (regexp == NULL) {
- fprintf(output, "NULL\n");
- return;
- }
- fprintf(output, "'%s' ", regexp->string);
- fprintf(output, "\n");
- fprintf(output, "%d atoms:\n", regexp->nbAtoms);
- for (i = 0;i < regexp->nbAtoms; i++) {
- fprintf(output, " %02d ", i);
- xmlRegPrintAtom(output, regexp->atoms[i]);
- }
- fprintf(output, "%d states:", regexp->nbStates);
- fprintf(output, "\n");
- for (i = 0;i < regexp->nbStates; i++) {
- xmlRegPrintState(output, regexp->states[i]);
- }
- fprintf(output, "%d counters:\n", regexp->nbCounters);
- for (i = 0;i < regexp->nbCounters; i++) {
- fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
- regexp->counters[i].max);
- }
-}
-
-/**
- * xmlRegexpCompile:
- * @regexp: a regular expression string
- *
- * Parses a regular expression conforming to XML Schemas Part 2 Datatype
- * Appendix F and build an automata suitable for testing strings against
- * that regular expression
- *
- * Returns the compiled expression or NULL in case of error
- */
-xmlRegexpPtr
-xmlRegexpCompile(const xmlChar *regexp) {
- xmlRegexpPtr ret;
- xmlRegParserCtxtPtr ctxt;
-
- ctxt = xmlRegNewParserCtxt(regexp);
- if (ctxt == NULL)
- return(NULL);
-
- /* initialize the parser */
- ctxt->end = NULL;
- ctxt->start = ctxt->state = xmlRegNewState(ctxt);
- xmlRegStatePush(ctxt, ctxt->start);
-
- /* parse the expression building an automata */
- xmlFAParseRegExp(ctxt, 1);
- if (CUR != 0) {
- ERROR("xmlFAParseRegExp: extra characters");
- }
- ctxt->end = ctxt->state;
- ctxt->start->type = XML_REGEXP_START_STATE;
- ctxt->end->type = XML_REGEXP_FINAL_STATE;
-
- /* remove the Epsilon except for counted transitions */
- xmlFAEliminateEpsilonTransitions(ctxt);
-
-
- if (ctxt->error != 0) {
- xmlRegFreeParserCtxt(ctxt);
- return(NULL);
- }
- ret = xmlRegEpxFromParse(ctxt);
- xmlRegFreeParserCtxt(ctxt);
- return(ret);
-}
-
-/**
- * xmlRegexpExec:
- * @comp: the compiled regular expression
- * @content: the value to check against the regular expression
- *
- * Check if the regular expression generate the value
- *
- * Returns 1 if it matches, 0 if not and a negativa value in case of error
- */
-int
-xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
- if ((comp == NULL) || (content == NULL))
- return(-1);
- return(xmlFARegExec(comp, content));
-}
-
-/**
- * xmlRegexpIsDeterminist:
- * @comp: the compiled regular expression
- *
- * Check if the regular expression is determinist
- *
- * Returns 1 if it yes, 0 if not and a negativa value in case of error
- */
-int
-xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
- xmlAutomataPtr am;
- int ret;
-
- if (comp == NULL)
- return(-1);
- if (comp->determinist != -1)
- return(comp->determinist);
-
- am = xmlNewAutomata();
- if (am->states != NULL) {
- int i;
-
- for (i = 0;i < am->nbStates;i++)
- xmlRegFreeState(am->states[i]);
- xmlFree(am->states);
- }
- am->nbAtoms = comp->nbAtoms;
- am->atoms = comp->atoms;
- am->nbStates = comp->nbStates;
- am->states = comp->states;
- am->determinist = -1;
- ret = xmlFAComputesDeterminism(am);
- am->atoms = NULL;
- am->states = NULL;
- xmlFreeAutomata(am);
- return(ret);
-}
-
-/**
- * xmlRegFreeRegexp:
- * @regexp: the regexp
- *
- * Free a regexp
- */
-void
-xmlRegFreeRegexp(xmlRegexpPtr regexp) {
- int i;
- if (regexp == NULL)
- return;
-
- if (regexp->string != NULL)
- xmlFree(regexp->string);
- if (regexp->states != NULL) {
- for (i = 0;i < regexp->nbStates;i++)
- xmlRegFreeState(regexp->states[i]);
- xmlFree(regexp->states);
- }
- if (regexp->atoms != NULL) {
- for (i = 0;i < regexp->nbAtoms;i++)
- xmlRegFreeAtom(regexp->atoms[i]);
- xmlFree(regexp->atoms);
- }
- if (regexp->counters != NULL)
- xmlFree(regexp->counters);
- if (regexp->compact != NULL)
- xmlFree(regexp->compact);
- if (regexp->transdata != NULL)
- xmlFree(regexp->transdata);
- if (regexp->stringMap != NULL) {
- for (i = 0; i < regexp->nbstrings;i++)
- xmlFree(regexp->stringMap[i]);
- xmlFree(regexp->stringMap);
- }
-
- xmlFree(regexp);
-}
-
-#ifdef LIBXML_AUTOMATA_ENABLED
-/************************************************************************
- * *
- * The Automata interface *
- * *
- ************************************************************************/
-
-/**
- * xmlNewAutomata:
- *
- * Create a new automata
- *
- * Returns the new object or NULL in case of failure
- */
-xmlAutomataPtr
-xmlNewAutomata(void) {
- xmlAutomataPtr ctxt;
-
- ctxt = xmlRegNewParserCtxt(NULL);
- if (ctxt == NULL)
- return(NULL);
-
- /* initialize the parser */
- ctxt->end = NULL;
- ctxt->start = ctxt->state = xmlRegNewState(ctxt);
- xmlRegStatePush(ctxt, ctxt->start);
-
- return(ctxt);
-}
-
-/**
- * xmlFreeAutomata:
- * @am: an automata
- *
- * Free an automata
- */
-void
-xmlFreeAutomata(xmlAutomataPtr am) {
- if (am == NULL)
- return;
- xmlRegFreeParserCtxt(am);
-}
-
-/**
- * xmlAutomataGetInitState:
- * @am: an automata
- *
- * Initial state lookup
- *
- * Returns the initial state of the automata
- */
-xmlAutomataStatePtr
-xmlAutomataGetInitState(xmlAutomataPtr am) {
- if (am == NULL)
- return(NULL);
- return(am->start);
-}
-
-/**
- * xmlAutomataSetFinalState:
- * @am: an automata
- * @state: a state in this automata
- *
- * Makes that state a final state
- *
- * Returns 0 or -1 in case of error
- */
-int
-xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
- if ((am == NULL) || (state == NULL))
- return(-1);
- state->type = XML_REGEXP_FINAL_STATE;
- return(0);
-}
-
-/**
- * xmlAutomataNewTransition:
- * @am: an automata
- * @from: the starting point of the transition
- * @to: the target point of the transition or NULL
- * @token: the input string associated to that transition
- * @data: data passed to the callback function if the transition is activated
- *
- * If @to is NULL, this create first a new target state in the automata
- * and then adds a transition from the @from state to the target state
- * activated by the value of @token
- *
- * Returns the target state or NULL in case of error
- */
-xmlAutomataStatePtr
-xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
- xmlAutomataStatePtr to, const xmlChar *token,
- void *data) {
- xmlRegAtomPtr atom;
-
- if ((am == NULL) || (from == NULL) || (token == NULL))
- return(NULL);
- atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
- atom->data = data;
- if (atom == NULL)
- return(NULL);
- atom->valuep = xmlStrdup(token);
-
- xmlFAGenerateTransitions(am, from, to, atom);
- if (to == NULL)
- return(am->state);
- return(to);
-}
-
-/**
- * xmlAutomataNewCountTrans:
- * @am: an automata
- * @from: the starting point of the transition
- * @to: the target point of the transition or NULL
- * @token: the input string associated to that transition
- * @min: the minimum successive occurences of token
- * @max: the maximum successive occurences of token
- * @data: data associated to the transition
- *
- * If @to is NULL, this create first a new target state in the automata
- * and then adds a transition from the @from state to the target state
- * activated by a succession of input of value @token and whose number
- * is between @min and @max
- *
- * Returns the target state or NULL in case of error
- */
-xmlAutomataStatePtr
-xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
- xmlAutomataStatePtr to, const xmlChar *token,
- int min, int max, void *data) {
- xmlRegAtomPtr atom;
-
- if ((am == NULL) || (from == NULL) || (token == NULL))
- return(NULL);
- if (min < 0)
- return(NULL);
- if ((max < min) || (max < 1))
- return(NULL);
- atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
- if (atom == NULL)
- return(NULL);
- atom->valuep = xmlStrdup(token);
- atom->data = data;
- if (min == 0)
- atom->min = 1;
- else
- atom->min = min;
- atom->max = max;
-
- xmlFAGenerateTransitions(am, from, to, atom);
- if (to == NULL)
- to = am->state;
- if (to == NULL)
- return(NULL);
- if (min == 0)
- xmlFAGenerateEpsilonTransition(am, from, to);
- return(to);
-}
-
-/**
- * xmlAutomataNewOnceTrans:
- * @am: an automata
- * @from: the starting point of the transition
- * @to: the target point of the transition or NULL
- * @token: the input string associated to that transition
- * @min: the minimum successive occurences of token
- * @max: the maximum successive occurences of token
- * @data: data associated to the transition
- *
- * If @to is NULL, this create first a new target state in the automata
- * and then adds a transition from the @from state to the target state
- * activated by a succession of input of value @token and whose number
- * is between @min and @max, moreover that transistion can only be crossed
- * once.
- *
- * Returns the target state or NULL in case of error
- */
-xmlAutomataStatePtr
-xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
- xmlAutomataStatePtr to, const xmlChar *token,
- int min, int max, void *data) {
- xmlRegAtomPtr atom;
- int counter;
-
- if ((am == NULL) || (from == NULL) || (token == NULL))
- return(NULL);
- if (min < 1)
- return(NULL);
- if ((max < min) || (max < 1))
- return(NULL);
- atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
- if (atom == NULL)
- return(NULL);
- atom->valuep = xmlStrdup(token);
- atom->data = data;
- atom->quant = XML_REGEXP_QUANT_ONCEONLY;
- if (min == 0)
- atom->min = 1;
- else
- atom->min = min;
- atom->max = max;
- /*
- * associate a counter to the transition.
- */
- counter = xmlRegGetCounter(am);
- am->counters[counter].min = 1;
- am->counters[counter].max = 1;
-
- /* xmlFAGenerateTransitions(am, from, to, atom); */
- if (to == NULL) {
- to = xmlRegNewState(am);
- xmlRegStatePush(am, to);
- }
- xmlRegStateAddTrans(am, from, atom, to, counter, -1);
- xmlRegAtomPush(am, atom);
- am->state = to;
- if (to == NULL)
- to = am->state;
- if (to == NULL)
- return(NULL);
- return(to);
-}
-
-/**
- * xmlAutomataNewState:
- * @am: an automata
- *
- * Create a new disconnected state in the automata
- *
- * Returns the new state or NULL in case of error
- */
-xmlAutomataStatePtr
-xmlAutomataNewState(xmlAutomataPtr am) {
- xmlAutomataStatePtr to;
-
- if (am == NULL)
- return(NULL);
- to = xmlRegNewState(am);
- xmlRegStatePush(am, to);
- return(to);
-}
-
-/**
- * xmlAutomataNewEpsilon:
- * @am: an automata
- * @from: the starting point of the transition
- * @to: the target point of the transition or NULL
- *
- * If @to is NULL, this create first a new target state in the automata
- * and then adds a an epsilon transition from the @from state to the
- * target state
- *
- * Returns the target state or NULL in case of error
- */
-xmlAutomataStatePtr
-xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
- xmlAutomataStatePtr to) {
- if ((am == NULL) || (from == NULL))
- return(NULL);
- xmlFAGenerateEpsilonTransition(am, from, to);
- if (to == NULL)
- return(am->state);
- return(to);
-}
-
-/**
- * xmlAutomataNewAllTrans:
- * @am: an automata
- * @from: the starting point of the transition
- * @to: the target point of the transition or NULL
- * @lax: allow to transition if not all all transitions have been activated
- *
- * If @to is NULL, this create first a new target state in the automata
- * and then adds a an ALL transition from the @from state to the
- * target state. That transition is an epsilon transition allowed only when
- * all transitions from the @from node have been activated.
- *
- * Returns the target state or NULL in case of error
- */
-xmlAutomataStatePtr
-xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
- xmlAutomataStatePtr to, int lax) {
- if ((am == NULL) || (from == NULL))
- return(NULL);
- xmlFAGenerateAllTransition(am, from, to, lax);
- if (to == NULL)
- return(am->state);
- return(to);
-}
-
-/**
- * xmlAutomataNewCounter:
- * @am: an automata
- * @min: the minimal value on the counter
- * @max: the maximal value on the counter
- *
- * Create a new counter
- *
- * Returns the counter number or -1 in case of error
- */
-int
-xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
- int ret;
-
- if (am == NULL)
- return(-1);
-
- ret = xmlRegGetCounter(am);
- if (ret < 0)
- return(-1);
- am->counters[ret].min = min;
- am->counters[ret].max = max;
- return(ret);
-}
-
-/**
- * xmlAutomataNewCountedTrans:
- * @am: an automata
- * @from: the starting point of the transition
- * @to: the target point of the transition or NULL
- * @counter: the counter associated to that transition
- *
- * If @to is NULL, this create first a new target state in the automata
- * and then adds an epsilon transition from the @from state to the target state
- * which will increment the counter provided
- *
- * Returns the target state or NULL in case of error
- */
-xmlAutomataStatePtr
-xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
- xmlAutomataStatePtr to, int counter) {
- if ((am == NULL) || (from == NULL) || (counter < 0))
- return(NULL);
- xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
- if (to == NULL)
- return(am->state);
- return(to);
-}
-
-/**
- * xmlAutomataNewCounterTrans:
- * @am: an automata
- * @from: the starting point of the transition
- * @to: the target point of the transition or NULL
- * @counter: the counter associated to that transition
- *
- * If @to is NULL, this create first a new target state in the automata
- * and then adds an epsilon transition from the @from state to the target state
- * which will be allowed only if the counter is within the right range.
- *
- * Returns the target state or NULL in case of error
- */
-xmlAutomataStatePtr
-xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
- xmlAutomataStatePtr to, int counter) {
- if ((am == NULL) || (from == NULL) || (counter < 0))
- return(NULL);
- xmlFAGenerateCountedTransition(am, from, to, counter);
- if (to == NULL)
- return(am->state);
- return(to);
-}
-
-/**
- * xmlAutomataCompile:
- * @am: an automata
- *
- * Compile the automata into a Reg Exp ready for being executed.
- * The automata should be free after this point.
- *
- * Returns the compiled regexp or NULL in case of error
- */
-xmlRegexpPtr
-xmlAutomataCompile(xmlAutomataPtr am) {
- xmlRegexpPtr ret;
-
- xmlFAEliminateEpsilonTransitions(am);
- /* xmlFAComputesDeterminism(am); */
- ret = xmlRegEpxFromParse(am);
-
- return(ret);
-}
-
-/**
- * xmlAutomataIsDeterminist:
- * @am: an automata
- *
- * Checks if an automata is determinist.
- *
- * Returns 1 if true, 0 if not, and -1 in case of error
- */
-int
-xmlAutomataIsDeterminist(xmlAutomataPtr am) {
- int ret;
-
- if (am == NULL)
- return(-1);
-
- ret = xmlFAComputesDeterminism(am);
- return(ret);
-}
-#endif /* LIBXML_AUTOMATA_ENABLED */
-#endif /* LIBXML_REGEXP_ENABLED */