summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@colm.net>2020-03-22 15:22:06 +0200
committerAdrian Thurston <thurston@colm.net>2020-03-22 15:22:06 +0200
commitd8296ea1c38c13bbdb67adc94c2af2fa67f57dbd (patch)
treebf45c3b23fb8ab178657afc50508b363834862d2
parent351a7945df73ce5e44e2c9eaaa048f6e59d907b1 (diff)
downloadcolm-d8296ea1c38c13bbdb67adc94c2af2fa67f57dbd.tar.gz
trimming down parsedata.h and parsetree.h
These files belong in ragel, however there are some structs we need to keep here. Trim these files down to the bare so we can tease out what we need.
-rw-r--r--libfsm/action.h3
-rw-r--r--libfsm/fsmbase.cc1
-rw-r--r--libfsm/fsmgraph.h1
-rw-r--r--libfsm/gendata.cc1
-rw-r--r--libfsm/idbase.cc3
-rw-r--r--libfsm/parsedata.h416
-rw-r--r--libfsm/parsetree.h873
7 files changed, 121 insertions, 1177 deletions
diff --git a/libfsm/action.h b/libfsm/action.h
index 39169202..5aea6f7f 100644
--- a/libfsm/action.h
+++ b/libfsm/action.h
@@ -29,6 +29,9 @@ struct NameInst;
struct NameRef;
struct LongestMatch;
struct InlineList;
+struct LongestMatchPart;
+struct Action;
+struct CondSpace;
/*
* Inline code tree
diff --git a/libfsm/fsmbase.cc b/libfsm/fsmbase.cc
index bdf40279..cc2c8757 100644
--- a/libfsm/fsmbase.cc
+++ b/libfsm/fsmbase.cc
@@ -22,6 +22,7 @@
#include "fsmgraph.h"
#include "parsedata.h"
+#include "action.h"
#include <string.h>
#include <assert.h>
diff --git a/libfsm/fsmgraph.h b/libfsm/fsmgraph.h
index 2429d923..4af13101 100644
--- a/libfsm/fsmgraph.h
+++ b/libfsm/fsmgraph.h
@@ -61,7 +61,6 @@ struct StateAp;
struct FsmAp;
struct Action;
struct LongestMatchPart;
-struct LengthDef;
struct CondSpace;
struct FsmCtx;
struct InlineBlock;
diff --git a/libfsm/gendata.cc b/libfsm/gendata.cc
index 2a518e7a..f0be11fa 100644
--- a/libfsm/gendata.cc
+++ b/libfsm/gendata.cc
@@ -24,6 +24,7 @@
#include "ragel.h"
#include "parsedata.h"
#include "fsmgraph.h"
+#include "action.h"
#include <string.h>
#include <iostream>
diff --git a/libfsm/idbase.cc b/libfsm/idbase.cc
index c4daa344..21eca5c3 100644
--- a/libfsm/idbase.cc
+++ b/libfsm/idbase.cc
@@ -20,10 +20,10 @@
* SOFTWARE.
*/
-
#include "ragel.h"
#include "fsmgraph.h"
#include "parsedata.h"
+#include "action.h"
/* Error reporting format. */
ErrorFormat errorFormat = ErrorFormatGNU;
@@ -124,7 +124,6 @@ void FsmCtx::analyzeAction( Action *action, InlineList *inlineList )
}
}
-
/* Check actions for bad uses of fsm directives. We don't go inside longest
* match items in actions created by ragel, since we just want the user
* actions. */
diff --git a/libfsm/parsedata.h b/libfsm/parsedata.h
index d45de5a6..5fe21af8 100644
--- a/libfsm/parsedata.h
+++ b/libfsm/parsedata.h
@@ -25,96 +25,149 @@
#include <iostream>
#include <limits.h>
-#include <sstream>
-#include <vector>
-#include <set>
#include "avlmap.h"
#include "bstmap.h"
-#include "vector.h"
#include "dlist.h"
#include "fsmgraph.h"
#include "compare.h"
-#include "vector.h"
#include "common.h"
-#include "parsetree.h"
-#include "action.h"
-
-/* Forwards. */
-using std::ostream;
+#include "ragel.h"
+#include "avlmap.h"
+#include "bstmap.h"
+#include "vector.h"
+#include "dlist.h"
+#include "fsmgraph.h"
-struct VarDef;
+struct NameInst;
struct Join;
-struct Expression;
-struct Term;
-struct FactorWithAug;
-struct FactorWithLabel;
-struct FactorWithRep;
-struct FactorWithNeg;
-struct Factor;
-struct Literal;
-struct Range;
-struct RegExpr;
-struct ReItem;
-struct ReOrBlock;
-struct ReOrItem;
+struct ParseData;
+
+/* Tree nodes. */
+
+struct NfaUnion;
+struct MachineDef;
struct LongestMatch;
-struct CodeGenData;
-struct InputData;
-struct InputItem;
+struct LongestMatchPart;
+struct LmPartList;
+struct Range;
+struct LengthDef;
+struct Action;
+struct InlineList;
-typedef DList<LongestMatch> LmList;
+/* Reference to a named state. */
+struct NameRef : public Vector<std::string> {};
+typedef Vector<NameRef*> NameRefList;
+typedef Vector<NameInst*> NameTargList;
-/* This is used for tracking the include files/machine pairs. */
-struct IncludeHistoryItem
+/*
+ * LongestMatch
+ *
+ * Wherever possible the item match will execute on the character. If not
+ * possible the item match will execute on a lookahead character and either
+ * hold the current char (if one away) or backup.
+ *
+ * How to handle the problem of backing up over a buffer break?
+ *
+ * Don't want to use pending out transitions for embedding item match because
+ * the role of item match action is different: it may sometimes match on the
+ * final transition, or may match on a lookahead character.
+ *
+ * Don't want to invent a new operator just for this. So just trail action
+ * after machine, this means we can only use literal actions.
+ *
+ * The item action may
+ *
+ * What states of the machine will be final. The item actions that wrap around
+ * on the last character will go straight to the start state.
+ *
+ * Some transitions will be lookahead transitions, they will hold the current
+ * character. Crossing them with regular transitions must be restricted
+ * because it does not make sense. The transition cannot simultaneously hold
+ * and consume the current character.
+ */
+struct LongestMatchPart
{
- IncludeHistoryItem( const char *fileName, const char *sectionName )
- : fileName(fileName), sectionName(sectionName) {}
+ LongestMatchPart( Join *join, Action *action,
+ const InputLoc &semiLoc, int longestMatchId )
+ :
+ join(join), action(action), semiLoc(semiLoc),
+ longestMatchId(longestMatchId), inLmSelect(false) { }
- std::string fileName;
- std::string sectionName;
+ InputLoc getLoc();
+
+ Join *join;
+ Action *action;
+ InputLoc semiLoc;
+
+ Action *setActId;
+ Action *actOnLast;
+ Action *actOnNext;
+ Action *actLagBehind;
+ Action *actNfaOnLast;
+ Action *actNfaOnNext;
+ Action *actNfaOnEof;
+ int longestMatchId;
+ bool inLmSelect;
+ LongestMatch *longestMatch;
+
+ LongestMatchPart *prev, *next;
};
-typedef std::vector<IncludeHistoryItem> IncludeHistory;
+/* Declare a new type so that ptreetypes.h need not include dlist.h. */
+struct LmPartList
+ : DList<LongestMatchPart> {};
-/* Graph dictionary. */
-struct GraphDictEl
-:
- public AvlTreeEl<GraphDictEl>,
- public DListEl<GraphDictEl>
+struct LongestMatch
{
- GraphDictEl( std::string k )
- : key(k), value(0), isInstance(false) { }
- GraphDictEl( std::string k, VarDef *value )
- : key(k), value(value), isInstance(false) { }
-
- ~GraphDictEl()
- {
- delete value;
- }
-
- std::string getKey() { return key; }
+ /* Construct with a list of joins */
+ LongestMatch( const InputLoc &loc, LmPartList *longestMatchList )
+ :
+ loc(loc),
+ longestMatchList(longestMatchList),
+ lmSwitchHandlesError(false),
+ nfaConstruction(false)
+ { }
- std::string key;
- VarDef *value;
- bool isInstance;
-
- /* Location info of graph definition. Points to variable name of assignment. */
InputLoc loc;
+ LmPartList *longestMatchList;
+ std::string name;
+ Action *lmActSelect;
+ bool lmSwitchHandlesError;
+ bool nfaConstruction;
+
+ LongestMatch *next, *prev;
+
+ /* Tree traversal. */
+ FsmRes walkClassic( ParseData *pd );
+ FsmRes walk( ParseData *pd );
+
+ FsmRes mergeNfaStates( ParseData *pd, FsmAp *fsm );
+ bool onlyOneNfa( ParseData *pd, FsmAp *fsm, StateAp *st, NfaTrans *in );
+ bool matchCanFail( ParseData *pd, FsmAp *fsm, StateAp *st );
+ void eliminateNfaActions( ParseData *pd, FsmAp *fsm );
+ void advanceNfaActions( ParseData *pd, FsmAp *fsm );
+ FsmRes buildBaseNfa( ParseData *pd );
+ FsmRes walkNfa( ParseData *pd );
+
+ void makeNameTree( ParseData *pd );
+ void resolveNameRefs( ParseData *pd );
+ void transferScannerLeavingActions( FsmAp *graph );
+ void runLongestMatch( ParseData *pd, FsmAp *graph );
+ Action *newLmAction( ParseData *pd, const InputLoc &loc, const char *name,
+ InlineList *inlineList );
+ void makeActions( ParseData *pd );
+ void findName( ParseData *pd );
+ void restart( FsmAp *graph, TransAp *trans );
+ void restart( FsmAp *graph, CondAp *cond );
};
-typedef AvlTree<GraphDictEl, std::string, CmpString> GraphDict;
-typedef DList<GraphDictEl> GraphList;
/* Priority name dictionary. */
typedef AvlMapEl<std::string, int> PriorDictEl;
typedef AvlMap<std::string, int, CmpString> PriorDict;
-/* Local error name dictionary. */
-typedef AvlMapEl<std::string, int> LocalErrDictEl;
-typedef AvlMap<std::string, int, CmpString> LocalErrDict;
-
struct NameMapVal
{
Vector<NameInst*> vals;
@@ -124,7 +177,6 @@ struct NameMapVal
typedef AvlMapEl<std::string, NameMapVal*> NameMapEl;
typedef AvlMap<std::string, NameMapVal*, CmpString> NameMap;
typedef Vector<NameInst*> NameVect;
-typedef BstSet<NameInst*> NameSet;
/* Node in the tree of instantiated names. */
struct NameInst
@@ -182,248 +234,10 @@ struct NameFrame
NameInst *prevLocalScope;
};
-struct LengthDef
-{
- LengthDef( char *name )
- : name(name) {}
-
- char *name;
- LengthDef *prev, *next;
-};
-
-typedef DList<LengthDef> LengthDefList;
-
extern const int ORD_PUSH;
extern const int ORD_RESTORE;
extern const int ORD_COND;
extern const int ORD_COND2;
extern const int ORD_TEST;
-/* Class to collect information about the machine during the
- * parse of input. */
-struct ParseData
-{
- /* Create a new parse data object. This is done at the beginning of every
- * fsm specification. */
- ParseData( InputData *id, std::string sectionName,
- int machineId, const InputLoc &sectionLoc, const HostLang *hostLang,
- MinimizeLevel minimizeLevel, MinimizeOpt minimizeOpt );
- ~ParseData();
-
- /*
- * Setting up the graph dict.
- */
-
- /* Initialize a graph dict with the basic fsms. */
- void initGraphDict();
- void createBuiltin( const char *name, BuiltinMachine builtin );
-
- /* Make a name id in the current name instantiation scope if it is not
- * already there. */
- NameInst *addNameInst( const InputLoc &loc, std::string data, bool isLabel );
- void makeRootNames();
- void makeNameTree( GraphDictEl *gdNode );
- void makeExportsNameTree();
- void fillNameIndex( NameInst *from );
-
- /* Increments the usage count on entry names. Names that are no longer
- * needed will have their entry points unset. */
- void unsetObsoleteEntries( FsmAp *graph );
-
- /* Resove name references in action code and epsilon transitions. */
- NameSet resolvePart( NameInst *refFrom, const std::string &data, bool recLabelsOnly );
- void resolveFrom( NameSet &result, NameInst *refFrom,
- NameRef *nameRef, int namePos );
- NameInst *resolveStateRef( NameRef *nameRef, InputLoc &loc, Action *action );
- void resolveNameRefs( InlineList *inlineList, Action *action );
- void resolveActionNameRefs();
-
- /* Set the alphabet type. If type types are not valid returns false. */
- bool setAlphType( const InputLoc &loc, const HostLang *hostLang,
- const char *s1 );
- bool setAlphType( const InputLoc &loc, const HostLang *hostLang,
- const char *s1, const char *s2 );
-
- /* Override one of the variables ragel uses. */
- bool setVariable( const char *var, InlineList *inlineList );
-
- /* Dumping the name instantiation tree. */
- void printNameInst( std::ostream &out, NameInst *nameInst, int level );
- void printNameTree( std::ostream &out );
-
- void analysisResult( long code, long id, const char *scode );
-
- void reportBreadthResults( BreadthResult *breadth );
- BreadthResult *checkBreadth( FsmAp *fsm );
- void reportAnalysisResult( FsmRes &res );
-
- /* Make the graph from a graph dict node. Does minimization. */
- FsmRes makeInstance( GraphDictEl *gdNode );
- FsmRes makeSpecific( GraphDictEl *gdNode );
- FsmRes makeAll();
-
- void makeExports();
-
- FsmRes prepareMachineGen( GraphDictEl *graphDictEl, const HostLang *hostLang );
- void generateXML( ostream &out );
- void generateReduced( const char *inputFileName, CodeStyle codeStyle,
- std::ostream &out, const HostLang *hostLang );
-
- std::string sectionName;
- FsmAp *sectionGraph;
-
- void initKeyOps( const HostLang *hostLang );
-
- void errorStateLabels( const NameSet &resolved );
-
- /*
- * Data collected during the parse.
- */
-
- /* Dictionary of graphs. Both instances and non-instances go here. */
- GraphDict graphDict;
-
- /* The list of instances. */
- GraphList instanceList;
-
- /* Dictionary of actions. Lets actions be defined and then referenced. */
- ActionDict actionDict;
-
- /* Dictionary of named priorities. */
- PriorDict priorDict;
-
- /* Dictionary of named local errors. */
- LocalErrDict localErrDict;
-
- /* Various next identifiers. */
- int nextLocalErrKey, nextNameId;
-
- /* The default priority number key for a machine. This is active during
- * the parse of the rhs of a machine assignment. */
- int curDefPriorKey;
-
- int curDefLocalErrKey;
-
- /* Alphabet type. */
- HostType *alphType;
- HostType *userAlphType;
- bool alphTypeSet;
- InputLoc alphTypeLoc;
-
- /* The alphabet range. */
- char *lowerNum, *upperNum;
- Key lowKey, highKey;
- InputLoc rangeLowLoc, rangeHighLoc;
-
- InputData *id;
-
- /* The name of the file the fsm is from, and the spec name. */
- int machineId;
- InputLoc sectionLoc;
-
- /* Root of the name tree. One root is for the instantiated machines. The
- * other root is for exported definitions. */
- NameInst *rootName;
- NameInst *exportsRootName;
-
- /* Name tree walking. */
- NameInst *curNameInst;
- int curNameChild;
-
- /* The place where resolved epsilon transitions go. These cannot go into
- * the parse tree because a single epsilon op can resolve more than once
- * to different nameInsts if the machine it's in is used more than once. */
- NameVect epsilonResolvedLinks;
- int nextEpsilonResolvedLink;
-
- /* Root of the name tree used for doing local name searches. */
- NameInst *localNameScope;
-
- void setLmInRetLoc( InlineList *inlineList );
- void initLongestMatchData();
- void longestMatchInitTweaks( FsmAp *graph );
- void initNameWalk();
- void initExportsNameWalk();
- NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; }
- NameFrame enterNameScope( bool isLocal, int numScopes );
- void popNameScope( const NameFrame &frame );
- void resetNameScope( const NameFrame &frame );
-
- void nfaTermCheckKleeneZero();
- void nfaTermCheckMinZero();
- void nfaTermCheckPlusZero();
- void nfaTermCheckRepZero();
- void nfaTermCheckZeroReps();
-
- void clear();
-
- /* Counter for assigning ids to longest match items. */
- int nextLongestMatchId;
-
- int nextRepId;
-
- /* List of all longest match parse tree items. */
- LmList lmList;
-
- Action *newLmCommonAction( const char *name, InlineList *inlineList );
-
- Action *initTokStart;
- int initTokStartOrd;
-
- Action *setTokStart;
- int setTokStartOrd;
-
- Action *initActId;
- int initActIdOrd;
-
- Action *setTokEnd;
- int setTokEndOrd;
-
- LengthDefList lengthDefList;
-
- CodeGenData *cgd;
-
- struct Cut
- {
- Cut( std::string name, int entryId )
- : name(name), entryId(entryId) {}
-
- std::string name;
- int entryId;
- };
-
- /* Track the cuts we set in the fsm graph. We perform cost analysis on the
- * built fsm graph for each of these entry points. */
- Vector<Cut> cuts;
-
- ParseData *prev, *next;
-
- FsmCtx *fsmCtx;
-
- /* Make a list of places to look for an included file. */
- bool duplicateInclude( const char *inclFileName, const char *inclSectionName );
-
- IncludeHistory includeHistory;
-
- std::set<std::string> actionParams;
-};
-
-Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd );
-Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd );
-Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd );
-Key makeFsmKeyChar( char c, ParseData *pd );
-void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd );
-void makeFsmUniqueKeyArray( KeySet &result, const char *data, int len,
- bool caseInsensitive, ParseData *pd );
-FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd );
-FsmAp *dotFsm( ParseData *pd );
-FsmAp *dotStarFsm( ParseData *pd );
-
-Key *prepareHexString( ParseData *pd, const InputLoc &loc,
- const char *data, long length, long &resLen );
-char *prepareLitString( InputData *id, const InputLoc &loc, const char *data, long length,
- long &resLen, bool &caseInsensitive );
-const char *checkLitOptions( InputData *id, const InputLoc &loc,
- const char *data, int length, bool &caseInsensitive );
-
#endif
diff --git a/libfsm/parsetree.h b/libfsm/parsetree.h
deleted file mode 100644
index 1d4f7e6b..00000000
--- a/libfsm/parsetree.h
+++ /dev/null
@@ -1,873 +0,0 @@
-/*
- * Copyright 2001-2018 Adrian Thurston <thurston@colm.net>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _PARSETREE_H
-#define _PARSETREE_H
-
-#include "ragel.h"
-#include "avlmap.h"
-#include "bstmap.h"
-#include "vector.h"
-#include "dlist.h"
-#include "fsmgraph.h"
-
-struct NameInst;
-
-/* Types of builtin machines. */
-enum BuiltinMachine
-{
- BT_Any,
- BT_Ascii,
- BT_Extend,
- BT_Alpha,
- BT_Digit,
- BT_Alnum,
- BT_Lower,
- BT_Upper,
- BT_Cntrl,
- BT_Graph,
- BT_Print,
- BT_Punct,
- BT_Space,
- BT_Xdigit,
- BT_Lambda,
- BT_Empty
-};
-
-
-struct ParseData;
-
-/* Leaf type. */
-struct Literal;
-
-/* Tree nodes. */
-
-struct Term;
-struct FactorWithAug;
-struct FactorWithRep;
-struct FactorWithNeg;
-struct Factor;
-struct Expression;
-struct Join;
-struct NfaUnion;
-struct MachineDef;
-struct LongestMatch;
-struct LongestMatchPart;
-struct LmPartList;
-struct Range;
-struct LengthDef;
-struct colm_data;
-struct colm_location;
-
-/* Type of augmentation. Describes locations in the machine. */
-enum AugType
-{
- /* Transition actions/priorities. */
- at_start,
- at_all,
- at_finish,
- at_leave,
-
- /* Global error actions. */
- at_start_gbl_error,
- at_all_gbl_error,
- at_final_gbl_error,
- at_not_start_gbl_error,
- at_not_final_gbl_error,
- at_middle_gbl_error,
-
- /* Local error actions. */
- at_start_local_error,
- at_all_local_error,
- at_final_local_error,
- at_not_start_local_error,
- at_not_final_local_error,
- at_middle_local_error,
-
- /* To State Action embedding. */
- at_start_to_state,
- at_all_to_state,
- at_final_to_state,
- at_not_start_to_state,
- at_not_final_to_state,
- at_middle_to_state,
-
- /* From State Action embedding. */
- at_start_from_state,
- at_all_from_state,
- at_final_from_state,
- at_not_start_from_state,
- at_not_final_from_state,
- at_middle_from_state,
-
- /* EOF Action embedding. */
- at_start_eof,
- at_all_eof,
- at_final_eof,
- at_not_start_eof,
- at_not_final_eof,
- at_middle_eof
-};
-
-/* IMPORTANT: These must follow the same order as the state augs in AugType
- * since we will be using this to compose AugType. */
-enum StateAugType
-{
- sat_start = 0,
- sat_all,
- sat_final,
- sat_not_start,
- sat_not_final,
- sat_middle
-};
-
-struct Action;
-struct PriorDesc;
-struct RegExpr;
-struct ReItem;
-struct ReOrBlock;
-struct ReOrItem;
-struct ExplicitMachine;
-struct InlineItem;
-struct InlineList;
-
-/* Reference to a named state. */
-struct NameRef : public Vector<std::string> {};
-typedef Vector<NameRef*> NameRefList;
-typedef Vector<NameInst*> NameTargList;
-
-/* Structure for storing location of epsilon transitons. */
-struct EpsilonLink
-{
- EpsilonLink( const InputLoc &loc, NameRef *target )
- : loc(loc), target(target) { }
-
- InputLoc loc;
- NameRef *target;
-};
-
-struct Label
-{
- Label( const InputLoc &loc, std::string data )
- : loc(loc), data(data), cut(false) { }
-
- InputLoc loc;
- std::string data;
- bool cut;
-};
-
-/* Structrue represents an action assigned to some FactorWithAug node. The
- * factor with aug will keep an array of these. */
-struct ParserAction
-{
- ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
- : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
-
- InputLoc loc;
- AugType type;
- int localErrKey;
- Action *action;
-};
-
-struct ConditionTest
-{
- ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) :
- loc(loc), type(type), action(action), sense(sense) { }
-
- InputLoc loc;
- AugType type;
- Action *action;
- bool sense;
-};
-
-struct Token
-{
- char *data;
- int length;
- ParserLoc loc;
-
- void set( const char *str, int len, colm_location *cl);
- void set( colm_data *cd, colm_location *cl);
- void set( const char *str, int len, const InputLoc &loc );
- void set( const char *str, int len, const ParserLoc &loc );
-
-private:
- void _set( const char *str, int len );
-};
-
-
-struct RedToken
-{
- const char *data;
- int length;
- ParserLoc loc;
-
- void set( colm_data *cd, colm_location *cl);
-};
-
-
-/* Store the value and type of a priority augmentation. */
-struct PriorityAug
-{
- PriorityAug( AugType type, int priorKey, int priorValue ) :
- type(type), priorKey(priorKey), priorValue(priorValue) { }
-
- AugType type;
- int priorKey;
- int priorValue;
-};
-
-/*
- * A Variable Definition
- */
-struct VarDef
-{
- VarDef( std::string name, MachineDef *machineDef )
- : name(name), machineDef(machineDef), isExport(false) { }
-
- ~VarDef();
-
- /* Parse tree traversal. */
- FsmRes walk( ParseData *pd );
- void makeNameTree( const InputLoc &loc, ParseData *pd );
- void resolveNameRefs( ParseData *pd );
-
- std::string name;
- MachineDef *machineDef;
- bool isExport;
-};
-
-
-/*
- * LongestMatch
- *
- * Wherever possible the item match will execute on the character. If not
- * possible the item match will execute on a lookahead character and either
- * hold the current char (if one away) or backup.
- *
- * How to handle the problem of backing up over a buffer break?
- *
- * Don't want to use pending out transitions for embedding item match because
- * the role of item match action is different: it may sometimes match on the
- * final transition, or may match on a lookahead character.
- *
- * Don't want to invent a new operator just for this. So just trail action
- * after machine, this means we can only use literal actions.
- *
- * The item action may
- *
- * What states of the machine will be final. The item actions that wrap around
- * on the last character will go straight to the start state.
- *
- * Some transitions will be lookahead transitions, they will hold the current
- * character. Crossing them with regular transitions must be restricted
- * because it does not make sense. The transition cannot simultaneously hold
- * and consume the current character.
- */
-struct LongestMatchPart
-{
- LongestMatchPart( Join *join, Action *action,
- const InputLoc &semiLoc, int longestMatchId )
- :
- join(join), action(action), semiLoc(semiLoc),
- longestMatchId(longestMatchId), inLmSelect(false) { }
-
- InputLoc getLoc();
-
- Join *join;
- Action *action;
- InputLoc semiLoc;
-
- Action *setActId;
- Action *actOnLast;
- Action *actOnNext;
- Action *actLagBehind;
- Action *actNfaOnLast;
- Action *actNfaOnNext;
- Action *actNfaOnEof;
- int longestMatchId;
- bool inLmSelect;
- LongestMatch *longestMatch;
-
- LongestMatchPart *prev, *next;
-};
-
-/* Declare a new type so that ptreetypes.h need not include dlist.h. */
-struct LmPartList : DList<LongestMatchPart> {};
-
-struct LongestMatch
-{
- /* Construct with a list of joins */
- LongestMatch( const InputLoc &loc, LmPartList *longestMatchList )
- :
- loc(loc),
- longestMatchList(longestMatchList),
- lmSwitchHandlesError(false),
- nfaConstruction(false)
- { }
-
- InputLoc loc;
- LmPartList *longestMatchList;
- std::string name;
- Action *lmActSelect;
- bool lmSwitchHandlesError;
- bool nfaConstruction;
-
- LongestMatch *next, *prev;
-
- /* Tree traversal. */
- FsmRes walkClassic( ParseData *pd );
- FsmRes walk( ParseData *pd );
-
- FsmRes mergeNfaStates( ParseData *pd, FsmAp *fsm );
- bool onlyOneNfa( ParseData *pd, FsmAp *fsm, StateAp *st, NfaTrans *in );
- bool matchCanFail( ParseData *pd, FsmAp *fsm, StateAp *st );
- void eliminateNfaActions( ParseData *pd, FsmAp *fsm );
- void advanceNfaActions( ParseData *pd, FsmAp *fsm );
- FsmRes buildBaseNfa( ParseData *pd );
- FsmRes walkNfa( ParseData *pd );
-
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
- void transferScannerLeavingActions( FsmAp *graph );
- void runLongestMatch( ParseData *pd, FsmAp *graph );
- Action *newLmAction( ParseData *pd, const InputLoc &loc, const char *name,
- InlineList *inlineList );
- void makeActions( ParseData *pd );
- void findName( ParseData *pd );
- void restart( FsmAp *graph, TransAp *trans );
- void restart( FsmAp *graph, CondAp *cond );
-};
-
-
-/* List of Expressions. */
-typedef DList<Expression> ExprList;
-
-struct MachineDef
-{
- enum Type {
- JoinType,
- LongestMatchType,
- LengthDefType,
- NfaUnionType
- };
-
- MachineDef( Join *join )
- : join(join), longestMatch(0), lengthDef(0), nfaUnion(0),
- type(JoinType) {}
-
- MachineDef( LongestMatch *longestMatch )
- : join(0), longestMatch(longestMatch), lengthDef(0), nfaUnion(0),
- type(LongestMatchType) {}
-
- MachineDef( LengthDef *lengthDef )
- : join(0), longestMatch(0), lengthDef(lengthDef), nfaUnion(0),
- type(LengthDefType) {}
-
- MachineDef( NfaUnion *nfaUnion )
- : join(0), longestMatch(0), lengthDef(0), nfaUnion(nfaUnion),
- type(NfaUnionType) {}
-
- ~MachineDef();
-
- FsmRes walk( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
-
- Join *join;
- LongestMatch *longestMatch;
- LengthDef *lengthDef;
- NfaUnion *nfaUnion;
- Type type;
-};
-
-/*
- * Join
- */
-struct Join
-{
- /* Construct with the first expression. */
- Join( Expression *expr );
- Join( const InputLoc &loc, Expression *expr );
-
- ~Join()
- {
- exprList.empty();
- }
-
- /* Tree traversal. */
- FsmRes walk( ParseData *pd );
- FsmRes walkJoin( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
-
- /* Data. */
- InputLoc loc;
- ExprList exprList;
-};
-
-/*
- * Expression
- */
-struct Expression
-{
- enum Type {
- OrType,
- IntersectType,
- SubtractType,
- StrongSubtractType,
- TermType,
- BuiltinType
- };
-
- /* Construct with an expression on the left and a term on the right. */
- Expression( Expression *expression, Term *term, Type type ) :
- expression(expression), term(term),
- type(type), prev(this), next(this) { }
-
- /* Construct with only a term. */
- Expression( Term *term ) :
- expression(0), term(term),
- type(TermType) , prev(this), next(this) { }
-
- /* Construct with a builtin type. */
- Expression( BuiltinMachine builtin ) :
- expression(0), term(0), builtin(builtin),
- type(BuiltinType), prev(this), next(this) { }
-
- ~Expression();
-
- /* Tree traversal. */
- FsmRes walk( ParseData *pd, bool lastInSeq = true );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
-
- /* Node data. */
- Expression *expression;
- Term *term;
- BuiltinMachine builtin;
- Type type;
-
- Expression *prev, *next;
-};
-
-typedef Vector<Term*> TermVect;
-
-/*
- * NfaUnion
- */
-struct NfaUnion
-{
- /* Construct with only a term. */
- NfaUnion() : roundsList(0) { }
- ~NfaUnion();
-
- /* Tree traversal. */
- FsmRes walk( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
-
- /* Node data. */
- TermVect terms;
- NfaRoundVect *roundsList;
-};
-
-
-/*
- * Term
- */
-struct Term
-{
- enum Type {
- ConcatType,
- RightStartType,
- RightFinishType,
- LeftType,
- FactorWithAugType
- };
-
- Term( Term *term, FactorWithAug *factorWithAug ) :
- term(term), factorWithAug(factorWithAug), type(ConcatType) { }
-
- Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
- term(term), factorWithAug(factorWithAug), type(type) { }
-
- Term( Action *action1, Action *action2, Action *action3,
- Term *term, FactorWithAug *factorWithAug,
- FactorWithAug *factorWithAug2, Type type )
- :
- action1(action1), action2(action2), action3(action3),
- term(term), factorWithAug(factorWithAug),
- factorWithAug2(factorWithAug2), type(type)
- { }
-
- Term( FactorWithAug *factorWithAug ) :
- term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
-
- ~Term();
-
- FsmRes walk( ParseData *pd, bool lastInSeq = true );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
-
- Action *action1;
- Action *action2;
- Action *action3;
-
- Term *term;
- FactorWithAug *factorWithAug;
- FactorWithAug *factorWithAug2;
- Type type;
-
- /* Priority descriptor for RightFinish type. */
- PriorDesc priorDescs[2];
-};
-
-
-/* Third level of precedence. Augmenting nodes with actions and priorities. */
-struct FactorWithAug
-{
- FactorWithAug( FactorWithRep *factorWithRep )
- :
- priorDescs(0),
- factorWithRep(factorWithRep)
- {}
-
- ~FactorWithAug();
-
- /* Tree traversal. */
- FsmRes walk( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
-
- void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
- void assignPriorities( FsmAp *graph, int *priorOrd );
-
- void assignConditions( FsmAp *graph );
-
- /* Actions and priorities assigned to the factor node. */
- Vector<ParserAction> actions;
- Vector<PriorityAug> priorityAugs;
- PriorDesc *priorDescs;
- std::vector<Label> labels;
- Vector<EpsilonLink> epsilonLinks;
- Vector<ConditionTest> conditions;
-
- FactorWithRep *factorWithRep;
-};
-
-/* Fourth level of precedence. Trailing unary operators. Provide kleen star,
- * optional and plus. */
-struct FactorWithRep
-{
- enum Type {
- StarType,
- StarStarType,
- OptionalType,
- PlusType,
- ExactType,
- MaxType,
- MinType,
- RangeType,
- FactorWithNegType
- };
-
- FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
- int lowerRep, int upperRep, Type type )
- :
- loc(loc), repId(0), factorWithRep(factorWithRep),
- factorWithNeg(0), lowerRep(lowerRep),
- upperRep(upperRep), type(type)
- {}
-
- FactorWithRep( FactorWithNeg *factorWithNeg )
- : factorWithNeg(factorWithNeg), type(FactorWithNegType)
- {}
-
- ~FactorWithRep();
-
- /* Tree traversal. */
- FsmRes walk( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
-
- InputLoc loc;
- long long repId;
- FactorWithRep *factorWithRep;
- FactorWithNeg *factorWithNeg;
- int lowerRep, upperRep;
- Type type;
-
- /* Priority descriptor for StarStar type. */
- PriorDesc priorDescs[4];
-};
-
-/* Fifth level of precedence. Provides Negation. */
-struct FactorWithNeg
-{
- enum Type {
- NegateType,
- CharNegateType,
- FactorType
- };
-
- FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
- loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
-
- FactorWithNeg( Factor *factor ) :
- factorWithNeg(0), factor(factor), type(FactorType) { }
-
- ~FactorWithNeg();
-
- /* Tree traversal. */
- FsmRes walk( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
-
- InputLoc loc;
- FactorWithNeg *factorWithNeg;
- Factor *factor;
- Type type;
-};
-
-/*
- * Factor
- */
-struct Factor
-{
- /* Language elements a factor node can be. */
- enum Type {
- LiteralType,
- RangeType,
- OrExprType,
- RegExprType,
- ReferenceType,
- ParenType,
- LongestMatchType,
- NfaRep,
- NfaWrap,
- CondStar,
- CondPlus
- };
-
- enum NfaRepeatMode {
- NfaLegacy = 1,
- NfaGreedy,
- NfaLazy
- };
-
- /* Construct with a literal fsm. */
- Factor( Literal *literal ) :
- literal(literal), type(LiteralType) { }
-
- /* Construct with a range. */
- Factor( Range *range ) :
- range(range), type(RangeType) { }
-
- /* Construct with the or part of a regular expression. */
- Factor( ReItem *reItem ) :
- reItem(reItem), type(OrExprType) { }
-
- /* Construct with a regular expression. */
- Factor( RegExpr *regExpr ) :
- regExpr(regExpr), type(RegExprType) { }
-
- /* Construct with a reference to a var def. */
- Factor( const InputLoc &loc, VarDef *varDef ) :
- loc(loc), varDef(varDef), type(ReferenceType) {}
-
- /* Construct with a parenthesized join. */
- Factor( Join *join ) :
- join(join), type(ParenType) {}
-
- /* Construct with a longest match operator. */
- Factor( LongestMatch *longestMatch ) :
- longestMatch(longestMatch), type(LongestMatchType) {}
-
- Factor( const InputLoc &loc, long long repId, Expression *expression,
- Action *action1, Action *action2, Action *action3,
- Action *action4, Action *action5, Action *action6, Type type )
- :
- loc(loc), repId(repId), expression(expression),
- action1(action1), action2(action2), action3(action3),
- action4(action4), action5(action5), action6(action6),
- type(type)
- {}
-
- /* Cleanup. */
- ~Factor();
-
- /* Tree traversal. */
- FsmRes walk( ParseData *pd );
- void makeNameTree( ParseData *pd );
- void resolveNameRefs( ParseData *pd );
-
- InputLoc loc;
- Literal *literal;
- Range *range;
- ReItem *reItem;
- RegExpr *regExpr;
- VarDef *varDef;
- Join *join;
- LongestMatch *longestMatch;
- int lower, upper;
- long repId;
- Expression *expression;
- Action *action1;
- Action *action2;
- Action *action3;
- Action *action4;
- Action *action5;
- Action *action6;
- PriorDesc priorDescs[4];
- NfaRepeatMode mode;
-
- Type type;
-};
-
-/* A range machine. Only ever composed of two literals. */
-struct Range
-{
- Range( Literal *lowerLit, Literal *upperLit, bool caseIndep )
- : lowerLit(lowerLit), upperLit(upperLit), caseIndep(caseIndep) { }
-
- ~Range();
- FsmAp *walk( ParseData *pd );
-
- Literal *lowerLit;
- Literal *upperLit;
- bool caseIndep;
-};
-
-/* Some literal machine. Can be a number or literal string. */
-struct Literal
-{
- enum LiteralType { Number, LitString, HexString };
-
- Literal( const InputLoc &loc, bool neg, const char *_data, int len, LiteralType type )
- : loc(loc), neg(neg), type(type)
- {
- data.append( _data, len );
- }
-
- FsmAp *walk( ParseData *pd );
-
- InputLoc loc;
- bool neg;
- Vector<char> data;
- LiteralType type;
-};
-
-/* Regular expression. */
-struct RegExpr
-{
- enum RegExpType { RecurseItem, Empty };
-
- /* Constructors. */
- RegExpr() :
- type(Empty), caseInsensitive(false) { }
- RegExpr(RegExpr *regExpr, ReItem *item) :
- regExpr(regExpr), item(item),
- type(RecurseItem), caseInsensitive(false) { }
-
- ~RegExpr();
- FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
-
- RegExpr *regExpr;
- ReItem *item;
- RegExpType type;
- bool caseInsensitive;
-};
-
-/* An item in a regular expression. */
-struct ReItem
-{
- enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
-
- ReItem( const InputLoc &loc, const char *_data, int len )
- :
- loc(loc), star(false), type(Data)
- {
- data.append( _data, len );
- }
-
- ReItem( const InputLoc &loc, ReItemType type )
- : loc(loc), star(false), type(type) { }
-
- ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
- : loc(loc), orBlock(orBlock), star(false), type(type) { }
-
- ~ReItem();
- FsmRes walk( ParseData *pd, RegExpr *rootRegex );
-
- InputLoc loc;
- Vector<char> data;
- ReOrBlock *orBlock;
- bool star;
- ReItemType type;
-};
-
-/* An or block item. */
-struct ReOrBlock
-{
- enum ReOrBlockType { RecurseItem, Empty };
-
- /* Constructors. */
- ReOrBlock()
- : type(Empty) { }
- ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
- : orBlock(orBlock), item(item), type(RecurseItem) { }
-
- ~ReOrBlock();
- FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
-
- ReOrBlock *orBlock;
- ReOrItem *item;
- ReOrBlockType type;
-};
-
-/* An item in an or block. */
-struct ReOrItem
-{
- enum ReOrItemType { Data, Range };
-
- ReOrItem( const InputLoc &loc, const char *_data, int len )
- :
- loc(loc), type(Data)
- {
- data.append( _data, len );
- }
-
- ReOrItem( const InputLoc &loc, char lower, char upper )
- : loc(loc), lower(lower), upper(upper), type(Range) { }
-
- FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
-
- InputLoc loc;
- Vector<char> data;
- char lower;
- char upper;
- ReOrItemType type;
-};
-
-
-#endif