diff options
author | Glenn Strauss <gstrauss@gluelogic.com> | 2023-01-09 00:26:52 -0500 |
---|---|---|
committer | Glenn Strauss <gstrauss@gluelogic.com> | 2023-01-09 00:34:50 -0500 |
commit | f9d60e3e5eaed5bf434597fa3923fc06ddd67401 (patch) | |
tree | f10dd9e0d8f7fe1de3a406aed391e920bcf874e1 /src/lemon.c | |
parent | 25f5085a8e213308fa51218f7ac12ddc2437902e (diff) | |
download | lighttpd-git-f9d60e3e5eaed5bf434597fa3923fc06ddd67401.tar.gz |
[lemon] upgrade LEMON parser to SQLite maint ver
The LEMON parser is maintained as part of SQLite
https://www.sqlite.org/src/file/tool/lemon.c
https://www.sqlite.org/src/file/tool/lempar.c
(committed files are directly from above,
but with excess whitespace removed before line ends)
Diffstat (limited to 'src/lemon.c')
-rw-r--r-- | src/lemon.c | 3855 |
1 files changed, 2652 insertions, 1203 deletions
diff --git a/src/lemon.c b/src/lemon.c index 5d35db71..869ac580 100644 --- a/src/lemon.c +++ b/src/lemon.c @@ -1,15 +1,3 @@ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif - -#ifdef __COVERITY__ -#define _Float128 long double -#define _Float64x long double -#define _Float64 double -#define _Float32x double -#define _Float32 float -#endif - /* ** This file contains all sources (including headers) to the LEMON ** LALR(1) parser generator. The sources have been combined into a @@ -23,25 +11,36 @@ #include <string.h> #include <ctype.h> #include <stdlib.h> -#include <inttypes.h> -#include <unistd.h> /* access() */ +#include <assert.h> + +#define ISSPACE(X) isspace((unsigned char)(X)) +#define ISDIGIT(X) isdigit((unsigned char)(X)) +#define ISALNUM(X) isalnum((unsigned char)(X)) +#define ISALPHA(X) isalpha((unsigned char)(X)) +#define ISUPPER(X) isupper((unsigned char)(X)) +#define ISLOWER(X) islower((unsigned char)(X)) -#define UNUSED(x) ( (void)(x) ) #ifndef __WIN32__ # if defined(_WIN32) || defined(WIN32) -# define __WIN32__ +# define __WIN32__ # endif #endif -#if __GNUC__ > 2 -#define NORETURN __attribute__ ((__noreturn__)) +#ifdef __WIN32__ +#ifdef __cplusplus +extern "C" { +#endif +extern int access(const char *path, int mode); +#ifdef __cplusplus +} +#endif #else -#define NORETURN +#include <unistd.h> #endif /* #define PRIVATE static */ -#define PRIVATE static +#define PRIVATE #ifdef TEST #define MAXRHS 5 /* Set low to exercise exception code */ @@ -49,84 +48,187 @@ #define MAXRHS 1000 #endif -void *msort(void *list, void **next, int(*cmp)(void *, void *)); +extern void memory_error(); +static int showPrecedenceConflict = 0; +static char *msort(char*,char**,int(*)(const char*,const char*)); -static void memory_error() NORETURN; +/* +** Compilers are getting increasingly pedantic about type conversions +** as C evolves ever closer to Ada.... To work around the latest problems +** we have to define the following variant of strlen(). +*/ +#define lemonStrlen(X) ((int)strlen(X)) -/******** From the file "action.h" *************************************/ -struct action *Action_new(); -struct action *Action_sort(); -void Action_add(); +/* +** Compilers are starting to complain about the use of sprintf() and strcpy(), +** saying they are unsafe. So we define our own versions of those routines too. +** +** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and +** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). +** The third is a helper routine for vsnprintf() that adds texts to the end of a +** buffer, making sure the buffer is always zero-terminated. +** +** The string formatter is a minimal subset of stdlib sprintf() supporting only +** a few simply conversions: +** +** %d +** %s +** %.*s +** +*/ +static void lemon_addtext( + char *zBuf, /* The buffer to which text is added */ + int *pnUsed, /* Slots of the buffer used so far */ + const char *zIn, /* Text to add */ + int nIn, /* Bytes of text to add. -1 to use strlen() */ + int iWidth /* Field width. Negative to left justify */ +){ + if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){} + while( iWidth>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth--; } + if( nIn==0 ) return; + memcpy(&zBuf[*pnUsed], zIn, nIn); + *pnUsed += nIn; + while( (-iWidth)>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth++; } + zBuf[*pnUsed] = 0; +} +static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){ + int i, j, k, c; + int nUsed = 0; + const char *z; + char zTemp[50]; + str[0] = 0; + for(i=j=0; (c = zFormat[i])!=0; i++){ + if( c=='%' ){ + int iWidth = 0; + lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); + c = zFormat[++i]; + if( ISDIGIT(c) || (c=='-' && ISDIGIT(zFormat[i+1])) ){ + if( c=='-' ) i++; + while( ISDIGIT(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0'; + if( c=='-' ) iWidth = -iWidth; + c = zFormat[i]; + } + if( c=='d' ){ + int v = va_arg(ap, int); + if( v<0 ){ + lemon_addtext(str, &nUsed, "-", 1, iWidth); + v = -v; + }else if( v==0 ){ + lemon_addtext(str, &nUsed, "0", 1, iWidth); + } + k = 0; + while( v>0 ){ + k++; + zTemp[sizeof(zTemp)-k] = (v%10) + '0'; + v /= 10; + } + lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth); + }else if( c=='s' ){ + z = va_arg(ap, const char*); + lemon_addtext(str, &nUsed, z, -1, iWidth); + }else if( c=='.' && memcmp(&zFormat[i], ".*s", 3)==0 ){ + i += 2; + k = va_arg(ap, int); + z = va_arg(ap, const char*); + lemon_addtext(str, &nUsed, z, k, iWidth); + }else if( c=='%' ){ + lemon_addtext(str, &nUsed, "%", 1, 0); + }else{ + fprintf(stderr, "illegal format\n"); + exit(1); + } + j = i+1; + } + } + lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); + return nUsed; +} +static int lemon_sprintf(char *str, const char *format, ...){ + va_list ap; + int rc; + va_start(ap, format); + rc = lemon_vsprintf(str, format, ap); + va_end(ap); + return rc; +} +static void lemon_strcpy(char *dest, const char *src){ + while( (*(dest++) = *(src++))!=0 ){} +} +static void lemon_strcat(char *dest, const char *src){ + while( *dest ) dest++; + lemon_strcpy(dest, src); +} -/********* From the file "assert.h" ************************************/ -void myassert() NORETURN; -#ifndef NDEBUG -# define assert(X) if(!(X))myassert(__FILE__,__LINE__) -#else -# define assert(X) -#endif + +/* a few forward declarations... */ +struct rule; +struct lemon; +struct action; + +static struct action *Action_new(void); +static struct action *Action_sort(struct action *); /********** From the file "build.h" ************************************/ -void FindRulePrecedences(); -void FindFirstSets(); -void FindStates(); -void FindLinks(); -void FindFollowSets(); -void FindActions(); +void FindRulePrecedences(struct lemon*); +void FindFirstSets(struct lemon*); +void FindStates(struct lemon*); +void FindLinks(struct lemon*); +void FindFollowSets(struct lemon*); +void FindActions(struct lemon*); /********* From the file "configlist.h" *********************************/ -void Configlist_init(/* void */); -struct config *Configlist_add(/* struct rule *, int */); -struct config *Configlist_addbasis(/* struct rule *, int */); -void Configlist_closure(/* void */); -void Configlist_sort(/* void */); -void Configlist_sortbasis(/* void */); -struct config *Configlist_return(/* void */); -struct config *Configlist_basis(/* void */); -void Configlist_eat(/* struct config * */); -void Configlist_reset(/* void */); +void Configlist_init(void); +struct config *Configlist_add(struct rule *, int); +struct config *Configlist_addbasis(struct rule *, int); +void Configlist_closure(struct lemon *); +void Configlist_sort(void); +void Configlist_sortbasis(void); +struct config *Configlist_return(void); +struct config *Configlist_basis(void); +void Configlist_eat(struct config *); +void Configlist_reset(void); /********* From the file "error.h" ***************************************/ void ErrorMsg(const char *, int,const char *, ...); /****** From the file "option.h" ******************************************/ +enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, + OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; struct s_options { - enum { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, - OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR} type; - char *label; - void *arg; + enum option_type type; + const char *label; + char *arg; const char *message; }; -int OptInit(/* char**,struct s_options*,FILE* */); -int OptNArgs(/* void */); -char *OptArg(/* int */); -void OptErr(/* int */); -void OptPrint(/* void */); +int OptInit(char**,struct s_options*,FILE*); +int OptNArgs(void); +char *OptArg(int); +void OptErr(int); +void OptPrint(void); /******** From the file "parse.h" *****************************************/ -void Parse(/* struct lemon *lemp */); +void Parse(struct lemon *lemp); /********* From the file "plink.h" ***************************************/ -struct plink *Plink_new(/* void */); -void Plink_add(/* struct plink **, struct config * */); -void Plink_copy(/* struct plink **, struct plink * */); -void Plink_delete(/* struct plink * */); +struct plink *Plink_new(void); +void Plink_add(struct plink **, struct config *); +void Plink_copy(struct plink **, struct plink *); +void Plink_delete(struct plink *); /********** From the file "report.h" *************************************/ -void Reprint(/* struct lemon * */); -void ReportOutput(/* struct lemon * */); -void ReportTable(/* struct lemon * */); -void ReportHeader(/* struct lemon * */); -void CompressTables(/* struct lemon * */); +void Reprint(struct lemon *); +void ReportOutput(struct lemon *); +void ReportTable(struct lemon *, int, int); +void ReportHeader(struct lemon *); +void CompressTables(struct lemon *); +void ResortStates(struct lemon *); /********** From the file "set.h" ****************************************/ -void SetSize(/* int N */); /* All sets will be of size N */ -char *SetNew(/* void */); /* A new set for element 0..N */ -void SetFree(/* char* */); /* Deallocate a set */ - -int SetAdd(/* char*,int */); /* Add element to a set */ -int SetUnion(/* char *A,char *B */); /* A <- A U B, through element N */ - +void SetSize(int); /* All sets will be of size N */ +char *SetNew(void); /* A new set for element 0..N */ +void SetFree(char*); /* Deallocate a set */ +int SetAdd(char*,int); /* Add element to a set */ +int SetUnion(char *,char *); /* A <- A U B, thru element N */ #define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ /********** From the file "struct.h" *************************************/ @@ -134,52 +236,71 @@ int SetUnion(/* char *A,char *B */); /* A <- A U B, through element N */ ** Principal data structures for the LEMON parser generator. */ -typedef enum {Bo_FALSE=0, Bo_TRUE} Boolean; +typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; /* Symbols (terminals and nonterminals) of the grammar are stored ** in the following: */ +enum symbol_type { + TERMINAL, + NONTERMINAL, + MULTITERMINAL +}; +enum e_assoc { + LEFT, + RIGHT, + NONE, + UNK +}; struct symbol { - char *name; /* Name of the symbol */ + const char *name; /* Name of the symbol */ int index; /* Index number for this symbol */ - enum { - TERMINAL, - NONTERMINAL - } type; /* Symbols are all either TERMINALS or NTs */ + enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ struct rule *rule; /* Linked list of rules of this (if an NT) */ struct symbol *fallback; /* fallback token in case this token doesn't parse */ int prec; /* Precedence if defined (-1 otherwise) */ - enum e_assoc { - LEFT, - RIGHT, - NONE, - UNK - } assoc; /* Associativity if predecence is defined */ + enum e_assoc assoc; /* Associativity if precedence is defined */ char *firstset; /* First-set for all rules of this symbol */ Boolean lambda; /* True if NT and can generate an empty string */ + int useCnt; /* Number of times used */ char *destructor; /* Code which executes whenever this symbol is ** popped from the stack during error processing */ - int destructorln; /* Line number of destructor code */ + int destLineno; /* Line number for start of destructor. Set to + ** -1 for duplicate destructors. */ char *datatype; /* The data type of information held by this ** object. Only used if type==NONTERMINAL */ int dtnum; /* The data type number. In the parser, the value ** stack is a union. The .yy%d element of this ** union is the correct data type for this object */ + int bContent; /* True if this symbol ever carries content - if + ** it is ever more than just syntax */ + /* The following fields are used by MULTITERMINALs only */ + int nsubsym; /* Number of constituent symbols in the MULTI */ + struct symbol **subsym; /* Array of constituent symbols */ }; /* Each production rule in the grammar is stored in the following ** structure. */ struct rule { struct symbol *lhs; /* Left-hand side of the rule */ - char *lhsalias; /* Alias for the LHS (NULL if none) */ + const char *lhsalias; /* Alias for the LHS (NULL if none) */ + int lhsStart; /* True if left-hand side is the start symbol */ int ruleline; /* Line number for the rule */ int nrhs; /* Number of RHS symbols */ struct symbol **rhs; /* The RHS symbols */ - char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ + const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ int line; /* Line number at which code begins */ - char *code; /* The code executed when this rule is reduced */ + const char *code; /* The code executed when this rule is reduced */ + const char *codePrefix; /* Setup code before code[] above */ + const char *codeSuffix; /* Breakdown code after code[] above */ struct symbol *precsym; /* Precedence symbol for this rule */ int index; /* An index number for this rule */ + int iRule; /* Rule number as used in the generated tables */ + Boolean noCode; /* True if this rule has no associated C code */ + Boolean codeEmitted; /* True if the code has been emitted already */ Boolean canReduce; /* True if this rule is ever reduced */ + Boolean doesReduce; /* Reduce actions occur after optimization */ + Boolean neverReduce; /* Reduce is theoretically possible, but prevented + ** by actions or other outside implementation */ struct rule *nextlhs; /* Next rule with the same LHS */ struct rule *next; /* Next rule in the global list */ }; @@ -189,6 +310,10 @@ struct rule { ** Configurations also contain a follow-set which is a list of terminal ** symbols which are allowed to immediately follow the end of the rule. ** Every configuration is recorded as an instance of the following: */ +enum cfgstatus { + COMPLETE, + INCOMPLETE +}; struct config { struct rule *rp; /* The rule upon which the configuration is based */ int dot; /* The parse point */ @@ -196,31 +321,34 @@ struct config { struct plink *fplp; /* Follow-set forward propagation links */ struct plink *bplp; /* Follow-set backwards propagation links */ struct state *stp; /* Pointer to state which contains this */ - enum { - COMPLETE, /* The status is used during followset and */ - INCOMPLETE /* shift computations */ - } status; + enum cfgstatus status; /* used during followset and shift computations */ struct config *next; /* Next configuration in the state */ struct config *bp; /* The next basis configuration */ }; +enum e_action { + SHIFT, + ACCEPT, + REDUCE, + ERROR, + SSCONFLICT, /* A shift/shift conflict */ + SRCONFLICT, /* Was a reduce, but part of a conflict */ + RRCONFLICT, /* Was a reduce, but part of a conflict */ + SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ + RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ + NOT_USED, /* Deleted by compression */ + SHIFTREDUCE /* Shift first, then reduce */ +}; + /* Every shift or reduce operation is stored as one of the following */ struct action { struct symbol *sp; /* The look-ahead symbol */ - enum e_action { - SHIFT, - ACCEPT, - REDUCE, - ERROR, - CONFLICT, /* Was a reduce, but part of a conflict */ - SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ - RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ - NOT_USED /* Deleted by compression */ - } type; + enum e_action type; union { struct state *stp; /* The new state, if a shift */ struct rule *rp; /* The rule, if a reduce */ } x; + struct symbol *spOpt; /* SHIFTREDUCE optimization to this symbol */ struct action *next; /* Next action for this state */ struct action *collide; /* Next action with the same hash */ }; @@ -230,11 +358,13 @@ struct action { struct state { struct config *bp; /* The basis configurations for this state */ struct config *cfp; /* All configurations in this set */ - int index; /* Sequential number for this state */ - struct action *ap; /* Array of actions for this state */ + int statenum; /* Sequential number for this state */ + struct action *ap; /* List of actions for this state */ int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ - int iDflt; /* Default action */ + int iDfltReduce; /* Default action is to REDUCE by this rule */ + struct rule *pDfltReduce;/* The default REDUCE rule. */ + int autoReduce; /* True if this is an auto-reduce state */ }; #define NO_OFFSET (-2147483647) @@ -253,47 +383,54 @@ struct plink { struct lemon { struct state **sorted; /* Table of states sorted by state number */ struct rule *rule; /* List of all rules */ + struct rule *startRule; /* First rule */ int nstate; /* Number of states */ + int nxstate; /* nstate with tail degenerate states removed */ int nrule; /* Number of rules */ + int nruleWithAction; /* Number of rules with actions */ int nsymbol; /* Number of terminal and nonterminal symbols */ int nterminal; /* Number of terminal symbols */ + int minShiftReduce; /* Minimum shift-reduce action value */ + int errAction; /* Error action value */ + int accAction; /* Accept action value */ + int noAction; /* No-op action value */ + int minReduce; /* Minimum reduce action */ + int maxAction; /* Maximum action value of any kind */ struct symbol **symbols; /* Sorted array of pointers to symbols */ int errorcnt; /* Number of errors */ struct symbol *errsym; /* The error symbol */ + struct symbol *wildcard; /* Token that matches anything */ char *name; /* Name of the generated parser */ - char *arg; /* Declaration of the 3th argument to parser */ + char *arg; /* Declaration of the 3rd argument to parser */ + char *ctx; /* Declaration of 2nd argument to constructor */ char *tokentype; /* Type of terminal symbols in the parser stack */ char *vartype; /* The default type of non-terminal symbols */ char *start; /* Name of the start symbol for the grammar */ char *stacksize; /* Size of the parser stack */ char *include; /* Code to put at the start of the C file */ - int includeln; /* Line number for start of include code */ char *error; /* Code to execute when an error is seen */ - int errorln; /* Line number for start of error code */ char *overflow; /* Code to execute on a stack overflow */ - int overflowln; /* Line number for start of overflow code */ char *failure; /* Code to execute on parser failure */ - int failureln; /* Line number for start of failure code */ char *accept; /* Code to execute when the parser excepts */ - int acceptln; /* Line number for the start of accept code */ char *extracode; /* Code appended to the generated file */ - int extracodeln; /* Line number for the start of the extra code */ char *tokendest; /* Code to execute to destroy token data */ - int tokendestln; /* Line number for token destroyer code */ char *vardest; /* Code for the default non-terminal destructor */ - int vardestln; /* Line number for default non-term destructor code*/ char *filename; /* Name of the input file */ - char *tmplname; /* Name of the template file */ char *outname; /* Name of the current output file */ char *tokenprefix; /* A prefix added to token names in the .h file */ int nconflict; /* Number of parsing conflicts */ - int tablesize; /* Size of the parse tables */ + int nactiontab; /* Number of entries in the yy_action[] table */ + int nlookaheadtab; /* Number of entries in yy_lookahead[] */ + int tablesize; /* Total table size of all tables in bytes */ int basisflag; /* Print only basis configurations */ + int printPreprocessed; /* Show preprocessor output on stdout */ int has_fallback; /* True if any %fallback is seen in the grammar */ + int nolinenosflag; /* True if #line statements should not be printed */ char *argv0; /* Name of the program */ }; #define MemoryCheck(X) if((X)==0){ \ + extern void memory_error(); \ memory_error(); \ } @@ -309,108 +446,115 @@ struct lemon { /* ** Code for processing tables in the LEMON parser generator. */ - /* Routines for handling a strings */ -char *Strsafe(); +const char *Strsafe(const char *); -void Strsafe_init(/* void */); -int Strsafe_insert(/* char * */); -char *Strsafe_find(/* char * */); +void Strsafe_init(void); +int Strsafe_insert(const char *); +const char *Strsafe_find(const char *); /* Routines for handling symbols of the grammar */ -struct symbol *Symbol_new(); -int Symbolcmpp(/* struct symbol **, struct symbol ** */); -void Symbol_init(/* void */); -int Symbol_insert(/* struct symbol *, char * */); -struct symbol *Symbol_find(/* char * */); -struct symbol *Symbol_Nth(/* int */); -int Symbol_count(/* */); -int State_count(void); -struct symbol **Symbol_arrayof(/* */); +struct symbol *Symbol_new(const char *); +int Symbolcmpp(const void *, const void *); +void Symbol_init(void); +int Symbol_insert(struct symbol *, const char *); +struct symbol *Symbol_find(const char *); +struct symbol *Symbol_Nth(int); +int Symbol_count(void); +struct symbol **Symbol_arrayof(void); /* Routines to manage the state table */ -int Configcmp(/* struct config *, struct config * */); -struct state *State_new(); -void State_init(/* void */); -int State_insert(/* struct state *, struct config * */); -struct state *State_find(/* struct config * */); -struct state **State_arrayof(/* */); +int Configcmp(const char *, const char *); +struct state *State_new(void); +void State_init(void); +int State_insert(struct state *, struct config *); +struct state *State_find(struct config *); +struct state **State_arrayof(void); /* Routines used for efficiency in Configlist_add */ -void Configtable_init(/* void */); -int Configtable_insert(/* struct config * */); -struct config *Configtable_find(/* struct config * */); -void Configtable_clear(/* int(*)(struct config *) */); +void Configtable_init(void); +int Configtable_insert(struct config *); +struct config *Configtable_find(struct config *); +void Configtable_clear(int(*)(struct config *)); + /****************** From the file "action.c" *******************************/ /* ** Routines processing parser actions in the LEMON parser generator. */ /* Allocate a new parser action */ -struct action *Action_new(){ - static struct action *freelist = NULL; - struct action *new; +static struct action *Action_new(void){ + static struct action *actionfreelist = 0; + struct action *newaction; - if( freelist==NULL ){ + if( actionfreelist==0 ){ int i; int amt = 100; - freelist = (struct action *)malloc( sizeof(struct action)*amt ); - if( freelist==0 ){ + actionfreelist = (struct action *)calloc(amt, sizeof(struct action)); + if( actionfreelist==0 ){ fprintf(stderr,"Unable to allocate memory for a new parser action."); exit(1); } - for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1]; - freelist[amt-1].next = 0; + for(i=0; i<amt-1; i++) actionfreelist[i].next = &actionfreelist[i+1]; + actionfreelist[amt-1].next = 0; } - new = freelist; - freelist = freelist->next; - return new; + newaction = actionfreelist; + actionfreelist = actionfreelist->next; + return newaction; } -/* Compare two actions */ -static int actioncmp(ap1,ap2) -struct action *ap1; -struct action *ap2; -{ +/* Compare two actions for sorting purposes. Return negative, zero, or +** positive if the first action is less than, equal to, or greater than +** the first +*/ +static int actioncmp( + struct action *ap1, + struct action *ap2 +){ int rc; rc = ap1->sp->index - ap2->sp->index; - if( rc==0 ) rc = (int)ap1->type - (int)ap2->type; if( rc==0 ){ - assert( ap1->type==REDUCE || ap1->type==RD_RESOLVED || ap1->type==CONFLICT); - assert( ap2->type==REDUCE || ap2->type==RD_RESOLVED || ap2->type==CONFLICT); + rc = (int)ap1->type - (int)ap2->type; + } + if( rc==0 && (ap1->type==REDUCE || ap1->type==SHIFTREDUCE) ){ rc = ap1->x.rp->index - ap2->x.rp->index; } + if( rc==0 ){ + rc = (int) (ap2 - ap1); + } return rc; } /* Sort parser actions */ -struct action *Action_sort(ap) -struct action *ap; -{ - ap = (struct action *)msort(ap,(void **)&ap->next,actioncmp); +static struct action *Action_sort( + struct action *ap +){ + ap = (struct action *)msort((char *)ap,(char **)&ap->next, + (int(*)(const char*,const char*))actioncmp); return ap; } -void Action_add(app,type,sp,arg) -struct action **app; -enum e_action type; -struct symbol *sp; -void *arg; -{ - struct action *new; - new = Action_new(); - new->next = *app; - *app = new; - new->type = type; - new->sp = sp; +void Action_add( + struct action **app, + enum e_action type, + struct symbol *sp, + char *arg +){ + struct action *newaction; + newaction = Action_new(); + newaction->next = *app; + *app = newaction; + newaction->type = type; + newaction->sp = sp; + newaction->spOpt = 0; if( type==SHIFT ){ - new->x.stp = (struct state *)arg; + newaction->x.stp = (struct state *)arg; }else{ - new->x.rp = (struct rule *)arg; + newaction->x.rp = (struct rule *)arg; } } /********************** New code to implement the "acttab" module ***********/ @@ -420,26 +564,46 @@ void *arg; /* ** The state of the yy_action table under construction is an instance of -** the following structure +** the following structure. +** +** The yy_action table maps the pair (state_number, lookahead) into an +** action_number. The table is an array of integers pairs. The state_number +** determines an initial offset into the yy_action array. The lookahead +** value is then added to this initial offset to get an index X into the +** yy_action array. If the aAction[X].lookahead equals the value of the +** of the lookahead input, then the value of the action_number output is +** aAction[X].action. If the lookaheads do not match then the +** default action for the state_number is returned. +** +** All actions associated with a single state_number are first entered +** into aLookahead[] using multiple calls to acttab_action(). Then the +** actions for that single state_number are placed into the aAction[] +** array with a single call to acttab_insert(). The acttab_insert() call +** also resets the aLookahead[] array in preparation for the next +** state number. */ +struct lookahead_action { + int lookahead; /* Value of the lookahead token */ + int action; /* Action to take on the given lookahead */ +}; typedef struct acttab acttab; struct acttab { int nAction; /* Number of used slots in aAction[] */ int nActionAlloc; /* Slots allocated for aAction[] */ - struct { - int lookahead; /* Value of the lookahead token */ - int action; /* Action to take on the given lookahead */ - } *aAction, /* The yy_action[] table under construction */ + struct lookahead_action + *aAction, /* The yy_action[] table under construction */ *aLookahead; /* A single new transaction set */ int mnLookahead; /* Minimum aLookahead[].lookahead */ int mnAction; /* Action associated with mnLookahead */ int mxLookahead; /* Maximum aLookahead[].lookahead */ int nLookahead; /* Used slots in aLookahead[] */ int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ + int nterminal; /* Number of terminal symbols */ + int nsymbol; /* total number of symbols */ }; /* Return the number of entries in the yy_action table */ -#define acttab_size(X) ((X)->nAction) +#define acttab_lookahead_size(X) ((X)->nAction) /* The value for the N-th entry in yy_action */ #define acttab_yyaction(X,N) ((X)->aAction[N].action) @@ -448,29 +612,34 @@ struct acttab { #define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) /* Free all memory associated with the given acttab */ -PRIVATE void acttab_free(acttab *p){ +void acttab_free(acttab *p){ free( p->aAction ); free( p->aLookahead ); free( p ); } /* Allocate a new acttab structure */ -PRIVATE acttab *acttab_alloc(void){ - acttab *p = malloc( sizeof(*p) ); +acttab *acttab_alloc(int nsymbol, int nterminal){ + acttab *p = (acttab *) calloc( 1, sizeof(*p) ); if( p==0 ){ fprintf(stderr,"Unable to allocate memory for a new acttab."); exit(1); } memset(p, 0, sizeof(*p)); + p->nsymbol = nsymbol; + p->nterminal = nterminal; return p; } -/* Add a new action to the current transaction set +/* Add a new action to the current transaction set. +** +** This routine is called once for each lookahead for a particular +** state. */ -PRIVATE void acttab_action(acttab *p, int lookahead, int action){ +void acttab_action(acttab *p, int lookahead, int action){ if( p->nLookahead>=p->nLookaheadAlloc ){ p->nLookaheadAlloc += 25; - p->aLookahead = realloc( p->aLookahead, + p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead, sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); if( p->aLookahead==0 ){ fprintf(stderr,"malloc failed\n"); @@ -499,20 +668,28 @@ PRIVATE void acttab_action(acttab *p, int lookahead, int action){ ** to an empty set in preparation for a new round of acttab_action() calls. ** ** Return the offset into the action table of the new transaction. +** +** If the makeItSafe parameter is true, then the offset is chosen so that +** it is impossible to overread the yy_lookaside[] table regardless of +** the lookaside token. This is done for the terminal symbols, as they +** come from external inputs and can contain syntax errors. When makeItSafe +** is false, there is more flexibility in selecting offsets, resulting in +** a smaller table. For non-terminal symbols, which are never syntax errors, +** makeItSafe can be false. */ -PRIVATE int acttab_insert(acttab *p){ - int i, j, k, n; +int acttab_insert(acttab *p, int makeItSafe){ + int i, j, k, n, end; assert( p->nLookahead>0 ); /* Make sure we have enough space to hold the expanded action table ** in the worst case. The worst case occurs if the transaction set ** must be appended to the current action table */ - n = p->mxLookahead + 1; + n = p->nsymbol + 1; if( p->nAction + n >= p->nActionAlloc ){ int oldAlloc = p->nActionAlloc; p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; - p->aAction = realloc( p->aAction, + p->aAction = (struct lookahead_action *) realloc( p->aAction, sizeof(p->aAction[0])*p->nActionAlloc); if( p->aAction==0 ){ fprintf(stderr,"malloc failed\n"); @@ -524,28 +701,17 @@ PRIVATE int acttab_insert(acttab *p){ } } - /* Scan the existing action table looking for an offset where we can - ** insert the current transaction set. Fall out of the loop when that - ** offset is found. In the worst case, we fall out of the loop when - ** i reaches p->nAction, which means we append the new transaction set. + /* Scan the existing action table looking for an offset that is a + ** duplicate of the current transaction set. Fall out of the loop + ** if and when the duplicate is found. ** ** i is the index in p->aAction[] where p->mnLookahead is inserted. */ - for(i=0; i<p->nAction+p->mnLookahead; i++){ - if( p->aAction[i].lookahead<0 ){ - for(j=0; j<p->nLookahead; j++){ - k = p->aLookahead[j].lookahead - p->mnLookahead + i; - if( k<0 ) break; - if( p->aAction[k].lookahead>=0 ) break; - } - if( j<p->nLookahead ) continue; - for(j=0; j<p->nAction; j++){ - if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; - } - if( j==p->nAction ){ - break; /* Fits in empty slots */ - } - }else if( p->aAction[i].lookahead==p->mnLookahead ){ + end = makeItSafe ? p->mnLookahead : 0; + for(i=p->nAction-1; i>=end; i--){ + if( p->aAction[i].lookahead==p->mnLookahead ){ + /* All lookaheads and actions in the aLookahead[] transaction + ** must match against the candidate aAction[i] entry. */ if( p->aAction[i].action!=p->mnAction ) continue; for(j=0; j<p->nLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; @@ -554,22 +720,61 @@ PRIVATE int acttab_insert(acttab *p){ if( p->aLookahead[j].action!=p->aAction[k].action ) break; } if( j<p->nLookahead ) continue; + + /* No possible lookahead value that is not in the aLookahead[] + ** transaction is allowed to match aAction[i] */ n = 0; for(j=0; j<p->nAction; j++){ if( p->aAction[j].lookahead<0 ) continue; if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; } if( n==p->nLookahead ){ - break; /* Same as a prior transaction set */ + break; /* An exact match is found at offset i */ + } + } + } + + /* If no existing offsets exactly match the current transaction, find an + ** an empty offset in the aAction[] table in which we can add the + ** aLookahead[] transaction. + */ + if( i<end ){ + /* Look for holes in the aAction[] table that fit the current + ** aLookahead[] transaction. Leave i set to the offset of the hole. + ** If no holes are found, i is left at p->nAction, which means the + ** transaction will be appended. */ + i = makeItSafe ? p->mnLookahead : 0; + for(; i<p->nActionAlloc - p->mxLookahead; i++){ + if( p->aAction[i].lookahead<0 ){ + for(j=0; j<p->nLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + if( k<0 ) break; + if( p->aAction[k].lookahead>=0 ) break; + } + if( j<p->nLookahead ) continue; + for(j=0; j<p->nAction; j++){ + if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; + } + if( j==p->nAction ){ + break; /* Fits in empty slots */ + } } } } /* Insert transaction set at index i. */ +#if 0 + printf("Acttab:"); + for(j=0; j<p->nLookahead; j++){ + printf(" %d", p->aLookahead[j].lookahead); + } + printf(" inserted at %d\n", i); +#endif for(j=0; j<p->nLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; p->aAction[k] = p->aLookahead[j]; if( k>=p->nAction ) p->nAction = k+1; } + if( makeItSafe && i+p->nterminal>=p->nAction ) p->nAction = i+p->nterminal+1; p->nLookahead = 0; /* Return the offset that is added to the lookahead in order to get the @@ -577,17 +782,16 @@ PRIVATE int acttab_insert(acttab *p){ return i - p->mnLookahead; } -/********************** From the file "assert.c" ****************************/ /* -** A more efficient way of handling assertions. +** Return the size of the action table without the trailing syntax error +** entries. */ -void myassert(file,line) -char *file; -int line; -{ - fprintf(stderr,"Assertion failed on line %d of file \"%s\"\n",line,file); - exit(1); +int acttab_action_size(acttab *p){ + int n = p->nAction; + while( n>0 && p->aAction[n-1].lookahead<0 ){ n--; } + return n; } + /********************** From the file "build.c" *****************************/ /* ** Routines to construction the finite state machine for the LEMON @@ -603,18 +807,24 @@ int line; ** are not RHS symbols with a defined precedence, the precedence ** symbol field is left blank. */ -void FindRulePrecedences(xp) -struct lemon *xp; +void FindRulePrecedences(struct lemon *xp) { struct rule *rp; for(rp=xp->rule; rp; rp=rp->next){ if( rp->precsym==0 ){ - int i; - for(i=0; i<rp->nrhs; i++){ - if( rp->rhs[i]->prec>=0 ){ + int i, j; + for(i=0; i<rp->nrhs && rp->precsym==0; i++){ + struct symbol *sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + for(j=0; j<sp->nsubsym; j++){ + if( sp->subsym[j]->prec>=0 ){ + rp->precsym = sp->subsym[j]; + break; + } + } + }else if( sp->prec>=0 ){ rp->precsym = rp->rhs[i]; - break; - } + } } } } @@ -626,15 +836,14 @@ struct lemon *xp; ** The first set is the set of all terminal symbols which can begin ** a string generated by that nonterminal. */ -void FindFirstSets(lemp) -struct lemon *lemp; +void FindFirstSets(struct lemon *lemp) { - int i; + int i, j; struct rule *rp; int progress; for(i=0; i<lemp->nsymbol; i++){ - lemp->symbols[i]->lambda = Bo_FALSE; + lemp->symbols[i]->lambda = LEMON_FALSE; } for(i=lemp->nterminal; i<lemp->nsymbol; i++){ lemp->symbols[i]->firstset = SetNew(); @@ -646,10 +855,12 @@ struct lemon *lemp; for(rp=lemp->rule; rp; rp=rp->next){ if( rp->lhs->lambda ) continue; for(i=0; i<rp->nrhs; i++){ - if( rp->rhs[i]->lambda==Bo_FALSE ) break; + struct symbol *sp = rp->rhs[i]; + assert( sp->type==NONTERMINAL || sp->lambda==LEMON_FALSE ); + if( sp->lambda==LEMON_FALSE ) break; } if( i==rp->nrhs ){ - rp->lhs->lambda = Bo_TRUE; + rp->lhs->lambda = LEMON_TRUE; progress = 1; } } @@ -666,12 +877,17 @@ struct lemon *lemp; if( s2->type==TERMINAL ){ progress += SetAdd(s1->firstset,s2->index); break; - }else if( s1==s2 ){ - if( s1->lambda==Bo_FALSE ) break; - }else{ + }else if( s2->type==MULTITERMINAL ){ + for(j=0; j<s2->nsubsym; j++){ + progress += SetAdd(s1->firstset,s2->subsym[j]->index); + } + break; + }else if( s1==s2 ){ + if( s1->lambda==LEMON_FALSE ) break; + }else{ progress += SetUnion(s1->firstset,s2->firstset); - if( s2->lambda==Bo_FALSE ) break; - } + if( s2->lambda==LEMON_FALSE ) break; + } } } }while( progress ); @@ -682,9 +898,8 @@ struct lemon *lemp; ** are added to between some states so that the LR(1) follow sets ** can be computed later. */ -PRIVATE struct state *getstate(/* struct lemon * */); /* forward reference */ -void FindStates(lemp) -struct lemon *lemp; +PRIVATE struct state *getstate(struct lemon *); /* forward reference */ +void FindStates(struct lemon *lemp) { struct symbol *sp; struct rule *rp; @@ -696,14 +911,17 @@ struct lemon *lemp; sp = Symbol_find(lemp->start); if( sp==0 ){ ErrorMsg(lemp->filename,0, -"The specified start symbol \"%s\" is not \ -in a nonterminal of the grammar. \"%s\" will be used as the start \ -symbol instead.",lemp->start,lemp->rule->lhs->name); + "The specified start symbol \"%s\" is not " + "in a nonterminal of the grammar. \"%s\" will be used as the start " + "symbol instead.",lemp->start,lemp->startRule->lhs->name); lemp->errorcnt++; - sp = lemp->rule->lhs; + sp = lemp->startRule->lhs; } + }else if( lemp->startRule ){ + sp = lemp->startRule->lhs; }else{ - sp = lemp->rule->lhs; + ErrorMsg(lemp->filename,0,"Internal error - no start rule\n"); + exit(1); } /* Make sure the start symbol doesn't occur on the right-hand side of @@ -712,11 +930,11 @@ symbol instead.",lemp->start,lemp->rule->lhs->name); for(rp=lemp->rule; rp; rp=rp->next){ int i; for(i=0; i<rp->nrhs; i++){ - if( rp->rhs[i]==sp ){ + if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ ErrorMsg(lemp->filename,0, -"The start symbol \"%s\" occurs on the \ -right-hand side of a rule. This will result in a parser which \ -does not work properly.",sp->name); + "The start symbol \"%s\" occurs on the " + "right-hand side of a rule. This will result in a parser which " + "does not work properly.",sp->name); lemp->errorcnt++; } } @@ -727,6 +945,7 @@ does not work properly.",sp->name); ** left-hand side */ for(rp=sp->rule; rp; rp=rp->nextlhs){ struct config *newcfp; + rp->lhsStart = 1; newcfp = Configlist_addbasis(rp,0); SetAdd(newcfp->fws,0); } @@ -741,9 +960,8 @@ does not work properly.",sp->name); /* Return a pointer to a state which is described by the configuration ** list which has been built from calls to Configlist_add. */ -PRIVATE void buildshifts(/* struct lemon *, struct state * */); /* Forwd ref */ -PRIVATE struct state *getstate(lemp) -struct lemon *lemp; +PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ +PRIVATE struct state *getstate(struct lemon *lemp) { struct config *cfp, *bp; struct state *stp; @@ -776,7 +994,7 @@ struct lemon *lemp; MemoryCheck(stp); stp->bp = bp; /* Remember the configuration basis */ stp->cfp = cfp; /* Remember the configuration closure */ - stp->index = lemp->nstate++; /* Every state gets a sequence number */ + stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ stp->ap = 0; /* No actions, yet. */ State_insert(stp,stp->bp); /* Add to the state table */ buildshifts(lemp,stp); /* Recursively compute successor states */ @@ -784,16 +1002,30 @@ struct lemon *lemp; return stp; } +/* +** Return true if two symbols are the same. +*/ +int same_symbol(struct symbol *a, struct symbol *b) +{ + int i; + if( a==b ) return 1; + if( a->type!=MULTITERMINAL ) return 0; + if( b->type!=MULTITERMINAL ) return 0; + if( a->nsubsym!=b->nsubsym ) return 0; + for(i=0; i<a->nsubsym; i++){ + if( a->subsym[i]!=b->subsym[i] ) return 0; + } + return 1; +} + /* Construct all successor states to the given state. A "successor" ** state is any state which can be reached by a shift action. */ -PRIVATE void buildshifts(lemp,stp) -struct lemon *lemp; -struct state *stp; /* The state from which successors are computed */ +PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) { - struct config *cfp; /* For looping through the config closure of "stp" */ + struct config *cfp; /* For looping thru the config closure of "stp" */ struct config *bcfp; /* For the inner loop on config closure of "stp" */ - struct config *new; /* */ + struct config *newcfg; /* */ struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ struct state *newstp; /* A pointer to a successor state */ @@ -816,10 +1048,10 @@ struct state *stp; /* The state from which successors are computed */ if( bcfp->status==COMPLETE ) continue; /* Already used */ if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ - if( bsp!=sp ) continue; /* Must be same as for "cfp" */ + if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ bcfp->status = COMPLETE; /* Mark this config as used */ - new = Configlist_addbasis(bcfp->rp,bcfp->dot+1); - Plink_add(&new->bplp,bcfp); + newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); + Plink_add(&newcfg->bplp,bcfp); } /* Get a pointer to the state described by the basis configuration set @@ -828,15 +1060,21 @@ struct state *stp; /* The state from which successors are computed */ /* The state "newstp" is reached from the state "stp" by a shift action ** on the symbol "sp" */ - Action_add(&stp->ap,SHIFT,sp,newstp); + if( sp->type==MULTITERMINAL ){ + int i; + for(i=0; i<sp->nsubsym; i++){ + Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); + } + }else{ + Action_add(&stp->ap,SHIFT,sp,(char *)newstp); + } } } /* ** Construct the propagation links */ -void FindLinks(lemp) -struct lemon *lemp; +void FindLinks(struct lemon *lemp) { int i; struct config *cfp, *other; @@ -848,7 +1086,7 @@ struct lemon *lemp; ** which the link is attached. */ for(i=0; i<lemp->nstate; i++){ stp = lemp->sorted[i]; - for(cfp=stp->cfp; cfp; cfp=cfp->next){ + for(cfp=stp?stp->cfp:0; cfp; cfp=cfp->next){ cfp->stp = stp; } } @@ -857,7 +1095,7 @@ struct lemon *lemp; ** links are used in the follow-set computation. */ for(i=0; i<lemp->nstate; i++){ stp = lemp->sorted[i]; - for(cfp=stp->cfp; cfp; cfp=cfp->next){ + for(cfp=stp?stp->cfp:0; cfp; cfp=cfp->next){ for(plp=cfp->bplp; plp; plp=plp->next){ other = plp->cfp; Plink_add(&other->fplp,cfp); @@ -871,19 +1109,17 @@ struct lemon *lemp; ** A followset is the set of all symbols which can come immediately ** after a configuration. */ -void FindFollowSets(lemp) -struct lemon *lemp; +void FindFollowSets(struct lemon *lemp) { int i; struct config *cfp; - struct state *stp; struct plink *plp; int progress; int change; for(i=0; i<lemp->nstate; i++){ - stp = lemp->sorted[i]; - for(cfp=stp->cfp; cfp; cfp=cfp->next){ + assert( lemp->sorted[i]!=0 ); + for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ cfp->status = INCOMPLETE; } } @@ -891,31 +1127,31 @@ struct lemon *lemp; do{ progress = 0; for(i=0; i<lemp->nstate; i++){ - stp = lemp->sorted[i]; - for(cfp=stp->cfp; cfp; cfp=cfp->next){ + assert( lemp->sorted[i]!=0 ); + for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ if( cfp->status==COMPLETE ) continue; for(plp=cfp->fplp; plp; plp=plp->next){ change = SetUnion(plp->cfp->fws,cfp->fws); if( change ){ plp->cfp->status = INCOMPLETE; progress = 1; - } - } + } + } cfp->status = COMPLETE; } } }while( progress ); } -static int resolve_conflict(); +static int resolve_conflict(struct action *,struct action *); /* Compute the reduce actions, and resolve conflicts. */ -void FindActions(lemp) -struct lemon *lemp; +void FindActions(struct lemon *lemp) { int i,j; struct config *cfp; + struct state *stp; struct symbol *sp; struct rule *rp; @@ -924,7 +1160,6 @@ struct lemon *lemp; ** a configuration which has its dot at the extreme right. */ for(i=0; i<lemp->nstate; i++){ /* Loop over all states */ - struct state *stp; stp = lemp->sorted[i]; for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ @@ -932,9 +1167,9 @@ struct lemon *lemp; if( SetFind(cfp->fws,j) ){ /* Add a reduce action to the state "stp" which will reduce by the ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ - Action_add(&stp->ap,REDUCE,lemp->symbols[j],cfp->rp); + Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); } - } + } } } } @@ -942,41 +1177,43 @@ struct lemon *lemp; /* Add the accepting token */ if( lemp->start ){ sp = Symbol_find(lemp->start); - if( sp==0 ) sp = lemp->rule->lhs; + if( sp==0 ){ + if( lemp->startRule==0 ){ + fprintf(stderr, "internal error on source line %d: no start rule\n", + __LINE__); + exit(1); + } + sp = lemp->startRule->lhs; + } }else{ - sp = lemp->rule->lhs; + sp = lemp->startRule->lhs; } /* Add to the first state (which is always the starting state of the ** finite state machine) an action to ACCEPT if the lookahead is the ** start nonterminal. */ - if (lemp->nstate) { /*(should always be true)*/ - struct state *stp; - stp = lemp->sorted[0]; - Action_add(&stp->ap,ACCEPT,sp,0); - } + Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); /* Resolve conflicts */ for(i=0; i<lemp->nstate; i++){ struct action *ap, *nap; - struct state *stp; stp = lemp->sorted[i]; - assert( stp->ap ); + /* assert( stp->ap ); */ stp->ap = Action_sort(stp->ap); for(ap=stp->ap; ap && ap->next; ap=ap->next){ for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ /* The two actions "ap" and "nap" have the same lookahead. ** Figure out which one should be used */ - lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym); + lemp->nconflict += resolve_conflict(ap,nap); } } } /* Report an error for each rule that can never be reduced. */ - for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = Bo_FALSE; + for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; for(i=0; i<lemp->nstate; i++){ struct action *ap; for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ - if( ap->type==REDUCE ) ap->x.rp->canReduce = Bo_TRUE; + if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; } } for(rp=lemp->rule; rp; rp=rp->next){ @@ -987,7 +1224,7 @@ struct lemon *lemp; } /* Resolve a conflict between the two given actions. If the -** conflict can't be resolve, return non-zero. +** conflict can't be resolved, return non-zero. ** ** NO LONGER TRUE: ** To resolve a conflict, first look to see if either action @@ -999,23 +1236,25 @@ struct lemon *lemp; ** If either action is a SHIFT, then it must be apx. This ** function won't work if apx->type==REDUCE and apy->type==SHIFT. */ -static int resolve_conflict(apx,apy,errsym) -struct action *apx; -struct action *apy; -struct symbol *errsym; /* The error symbol (if defined. NULL otherwise) */ -{ +static int resolve_conflict( + struct action *apx, + struct action *apy +){ struct symbol *spx, *spy; int errcnt = 0; - UNUSED(errsym); assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ + if( apx->type==SHIFT && apy->type==SHIFT ){ + apy->type = SSCONFLICT; + errcnt++; + } if( apx->type==SHIFT && apy->type==REDUCE ){ spx = apx->sp; spy = apy->x.rp->precsym; if( spy==0 || spx->prec<0 || spy->prec<0 ){ /* Not enough precedence information. */ - apy->type = CONFLICT; + apy->type = SRCONFLICT; errcnt++; - }else if( spx->prec>spy->prec ){ /* Lower precedence wins */ + }else if( spx->prec>spy->prec ){ /* higher precedence wins */ apy->type = RD_RESOLVED; }else if( spx->prec<spy->prec ){ apx->type = SH_RESOLVED; @@ -1025,15 +1264,14 @@ struct symbol *errsym; /* The error symbol (if defined. NULL otherwise) */ apx->type = SH_RESOLVED; }else{ assert( spx->prec==spy->prec && spx->assoc==NONE ); - apy->type = CONFLICT; - errcnt++; + apx->type = ERROR; } }else if( apx->type==REDUCE && apy->type==REDUCE ){ spx = apx->x.rp->precsym; spy = apy->x.rp->precsym; if( spx==0 || spy==0 || spx->prec<0 || spy->prec<0 || spx->prec==spy->prec ){ - apy->type = CONFLICT; + apy->type = RRCONFLICT; errcnt++; }else if( spx->prec>spy->prec ){ apy->type = RD_RESOLVED; @@ -1044,10 +1282,14 @@ struct symbol *errsym; /* The error symbol (if defined. NULL otherwise) */ assert( apx->type==SH_RESOLVED || apx->type==RD_RESOLVED || - apx->type==CONFLICT || + apx->type==SSCONFLICT || + apx->type==SRCONFLICT || + apx->type==RRCONFLICT || apy->type==SH_RESOLVED || apy->type==RD_RESOLVED || - apy->type==CONFLICT + apy->type==SSCONFLICT || + apy->type==SRCONFLICT || + apy->type==RRCONFLICT ); /* The REDUCE/SHIFT case cannot happen because SHIFTs come before ** REDUCEs on the list. If we reach this point it must be because @@ -1068,34 +1310,19 @@ static struct config *basis = 0; /* Top of list of basis configs */ static struct config **basisend = 0; /* End of list of basis configs */ /* Return a pointer to a new configuration */ -PRIVATE struct config *newconfig(){ - struct config *new; - if( freelist==0 ){ - int i; - int amt = 3; - freelist = (struct config *)malloc( sizeof(struct config)*amt ); - if( freelist==0 ){ - fprintf(stderr,"Unable to allocate memory for a new configuration."); - exit(1); - } - for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1]; - freelist[amt-1].next = 0; - } - new = freelist; - freelist = freelist->next; - return new; +PRIVATE struct config *newconfig(void){ + return (struct config*)calloc(1, sizeof(struct config)); } /* The configuration "old" is no longer used */ -PRIVATE void deleteconfig(old) -struct config *old; +PRIVATE void deleteconfig(struct config *old) { old->next = freelist; freelist = old; } /* Initialized the configuration list builder */ -void Configlist_init(){ +void Configlist_init(void){ current = 0; currentend = ¤t; basis = 0; @@ -1105,7 +1332,7 @@ void Configlist_init(){ } /* Initialized the configuration list builder */ -void Configlist_reset(){ +void Configlist_reset(void){ current = 0; currentend = ¤t; basis = 0; @@ -1115,10 +1342,10 @@ void Configlist_reset(){ } /* Add another configuration to the configuration list */ -struct config *Configlist_add(rp,dot) -struct rule *rp; /* The rule */ -int dot; /* Index into the RHS of the rule where the dot goes */ -{ +struct config *Configlist_add( + struct rule *rp, /* The rule */ + int dot /* Index into the RHS of the rule where the dot goes */ +){ struct config *cfp, model; assert( currentend!=0 ); @@ -1142,9 +1369,7 @@ int dot; /* Index into the RHS of the rule where the dot goes */ } /* Add a basis configuration to the configuration list */ -struct config *Configlist_addbasis(rp,dot) -struct rule *rp; -int dot; +struct config *Configlist_addbasis(struct rule *rp, int dot) { struct config *cfp, model; @@ -1172,8 +1397,7 @@ int dot; } /* Compute the closure of the configuration list */ -void Configlist_closure(lemp) -struct lemon *lemp; +void Configlist_closure(struct lemon *lemp) { struct config *cfp, *newcfp; struct rule *rp, *newrp; @@ -1199,11 +1423,17 @@ struct lemon *lemp; if( xsp->type==TERMINAL ){ SetAdd(newcfp->fws,xsp->index); break; - }else{ + }else if( xsp->type==MULTITERMINAL ){ + int k; + for(k=0; k<xsp->nsubsym; k++){ + SetAdd(newcfp->fws, xsp->subsym[k]->index); + } + break; + }else{ SetUnion(newcfp->fws,xsp->firstset); - if( xsp->lambda==Bo_FALSE ) break; - } - } + if( xsp->lambda==LEMON_FALSE ) break; + } + } if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); } } @@ -1212,22 +1442,24 @@ struct lemon *lemp; } /* Sort the configuration list */ -void Configlist_sort(){ - current = (struct config *)msort(current,(void **)&(current->next),Configcmp); +void Configlist_sort(void){ + current = (struct config*)msort((char*)current,(char**)&(current->next), + Configcmp); currentend = 0; return; } /* Sort the basis configuration list */ -void Configlist_sortbasis(){ - basis = (struct config *)msort(current,(void **)&(current->bp),Configcmp); +void Configlist_sortbasis(void){ + basis = (struct config*)msort((char*)current,(char**)&(current->bp), + Configcmp); basisend = 0; return; } /* Return a pointer to the head of the configuration list and ** reset the list */ -struct config *Configlist_return(){ +struct config *Configlist_return(void){ struct config *old; old = current; current = 0; @@ -1237,7 +1469,7 @@ struct config *Configlist_return(){ /* Return a pointer to the head of the configuration list and ** reset the list */ -struct config *Configlist_basis(){ +struct config *Configlist_basis(void){ struct config *old; old = basis; basis = 0; @@ -1246,8 +1478,7 @@ struct config *Configlist_basis(){ } /* Free all elements of the given configuration list */ -void Configlist_eat(cfp) -struct config *cfp; +void Configlist_eat(struct config *cfp) { struct config *nextcfp; for(; cfp; cfp=nextcfp){ @@ -1264,72 +1495,13 @@ struct config *cfp; ** Code for printing error message. */ -/* Find a good place to break "msg" so that its length is at least "min" -** but no more than "max". Make the point as close to max as possible. -*/ -static int findbreak(msg,min,max) -char *msg; -int min; -int max; -{ - int i,spot; - char c; - for(i=spot=min; i<=max; i++){ - c = msg[i]; - if( c=='\t' ) msg[i] = ' '; - if( c=='\n' ){ msg[i] = ' '; spot = i; break; } - if( c==0 ){ spot = i; break; } - if( c=='-' && i<max-1 ) spot = i+1; - if( c==' ' ) spot = i; - } - return spot; -} - -/* -** The error message is split across multiple lines if necessary. The -** splits occur at a space, if there is a space available near the end -** of the line. -*/ -#define ERRMSGSIZE 10000 /* Hope this is big enough. No way to error check */ -#define LINEWIDTH 79 /* Max width of any output line */ -#define PREFIXLIMIT 30 /* Max width of the prefix on each line */ void ErrorMsg(const char *filename, int lineno, const char *format, ...){ - char errmsg[ERRMSGSIZE]; - char prefix[PREFIXLIMIT+10]; - int errmsgsize; - int prefixsize; - int availablewidth; va_list ap; - int end, restart, base; - + fprintf(stderr, "%s:%d: ", filename, lineno); va_start(ap, format); - /* Prepare a prefix to be prepended to every output line */ - if( lineno>0 ){ - sprintf(prefix,"%.*s:%d: ",PREFIXLIMIT-10,filename,lineno); - }else{ - sprintf(prefix,"%.*s: ",PREFIXLIMIT-10,filename); - } - prefixsize = strlen(prefix); - availablewidth = LINEWIDTH - prefixsize; - - /* Generate the error message */ - vsprintf(errmsg,format,ap); + vfprintf(stderr,format,ap); va_end(ap); - errmsgsize = strlen(errmsg); - /* Remove trailing '\n's from the error message. */ - while( errmsgsize>0 && errmsg[errmsgsize-1]=='\n' ){ - errmsg[--errmsgsize] = 0; - } - - /* Print the error message */ - base = 0; - while( errmsg[base]!=0 ){ - end = restart = findbreak(&errmsg[base],0,availablewidth); - restart += base; - while( errmsg[restart]==' ' ) restart++; - fprintf(stdout,"%s%.*s\n",prefix,end,&errmsg[base]); - base = restart; - } + fprintf(stderr, "\n"); } /**************** From the file "main.c" ************************************/ /* @@ -1339,17 +1511,119 @@ void ErrorMsg(const char *filename, int lineno, const char *format, ...){ /* Report an out-of-memory condition and abort. This function ** is used mostly by the "MemoryCheck" macro in struct.h */ -void memory_error() { +void memory_error(void){ fprintf(stderr,"Out of memory. Aborting...\n"); exit(1); } -static const char* out_dir = "."; +static int nDefine = 0; /* Number of -D options on the command line */ +static char **azDefine = 0; /* Name of the -D macros */ + +/* This routine is called with the argument to each -D command-line option. +** Add the macro defined to the azDefine array. +*/ +static void handle_D_option(char *z){ + char **paz; + nDefine++; + azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine); + if( azDefine==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + paz = &azDefine[nDefine-1]; + *paz = (char *) malloc( lemonStrlen(z)+1 ); + if( *paz==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + lemon_strcpy(*paz, z); + for(z=*paz; *z && *z!='='; z++){} + *z = 0; +} + +/* Rember the name of the output directory +*/ +static char *outputDir = NULL; +static void handle_d_option(char *z){ + outputDir = (char *) malloc( lemonStrlen(z)+1 ); + if( outputDir==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + lemon_strcpy(outputDir, z); +} + +static char *user_templatename = NULL; +static void handle_T_option(char *z){ + user_templatename = (char *) malloc( lemonStrlen(z)+1 ); + if( user_templatename==0 ){ + memory_error(); + } + lemon_strcpy(user_templatename, z); +} + +/* Merge together to lists of rules ordered by rule.iRule */ +static struct rule *Rule_merge(struct rule *pA, struct rule *pB){ + struct rule *pFirst = 0; + struct rule **ppPrev = &pFirst; + while( pA && pB ){ + if( pA->iRule<pB->iRule ){ + *ppPrev = pA; + ppPrev = &pA->next; + pA = pA->next; + }else{ + *ppPrev = pB; + ppPrev = &pB->next; + pB = pB->next; + } + } + if( pA ){ + *ppPrev = pA; + }else{ + *ppPrev = pB; + } + return pFirst; +} + +/* +** Sort a list of rules in order of increasing iRule value +*/ +static struct rule *Rule_sort(struct rule *rp){ + unsigned int i; + struct rule *pNext; + struct rule *x[32]; + memset(x, 0, sizeof(x)); + while( rp ){ + pNext = rp->next; + rp->next = 0; + for(i=0; i<sizeof(x)/sizeof(x[0])-1 && x[i]; i++){ + rp = Rule_merge(x[i], rp); + x[i] = 0; + } + x[i] = rp; + rp = pNext; + } + rp = 0; + for(i=0; i<sizeof(x)/sizeof(x[0]); i++){ + rp = Rule_merge(x[i], rp); + } + return rp; +} + +/* forward reference */ +static const char *minimum_size_type(int lwr, int upr, int *pnByte); + +/* Print a single line of the "Parser Stats" output +*/ +static void stats_line(const char *zLabel, int iValue){ + int nLabel = lemonStrlen(zLabel); + printf(" %s%.*s %5d\n", zLabel, + 35-nLabel, "................................", + iValue); +} + /* The main program. Parse the command line and do it... */ -int main(argc,argv) -int argc; -char **argv; -{ +int main(int argc, char **argv){ static int version = 0; static int rpflag = 0; static int basisflag = 0; @@ -1357,31 +1631,52 @@ char **argv; static int quiet = 0; static int statistics = 0; static int mhflag = 0; + static int nolinenosflag = 0; + static int noResort = 0; + static int sqlFlag = 0; + static int printPP = 0; + static struct s_options options[] = { {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."}, {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."}, + {OPT_FSTR, "d", (char*)&handle_d_option, "Output directory. Default '.'"}, + {OPT_FSTR, "D", (char*)handle_D_option, "Define an %ifdef macro."}, + {OPT_FLAG, "E", (char*)&printPP, "Print input file after preprocessing."}, + {OPT_FSTR, "f", 0, "Ignored. (Placeholder for -f compiler options.)"}, {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."}, - {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file"}, + {OPT_FSTR, "I", 0, "Ignored. (Placeholder for '-I' compiler options.)"}, + {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file."}, + {OPT_FLAG, "l", (char*)&nolinenosflag, "Do not print #line statements."}, + {OPT_FSTR, "O", 0, "Ignored. (Placeholder for '-O' compiler options.)"}, + {OPT_FLAG, "p", (char*)&showPrecedenceConflict, + "Show conflicts resolved by precedence rules"}, {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."}, - {OPT_FLAG, "s", (char*)&statistics, "Print parser stats to standard output."}, + {OPT_FLAG, "r", (char*)&noResort, "Do not sort or renumber states"}, + {OPT_FLAG, "s", (char*)&statistics, + "Print parser stats to standard output."}, + {OPT_FLAG, "S", (char*)&sqlFlag, + "Generate the *.sql file describing the parser tables."}, {OPT_FLAG, "x", (char*)&version, "Print the version number."}, - {OPT_STR, "o", (char*)&out_dir, "Customize output directory."}, + {OPT_FSTR, "T", (char*)handle_T_option, "Specify a template file."}, + {OPT_FSTR, "W", 0, "Ignored. (Placeholder for '-W' compiler options.)"}, {OPT_FLAG,0,0,0} }; int i; + int exitcode; struct lemon lem; - char *def_tmpl_name = "lempar.c"; + struct rule *rp; - UNUSED(argc); + (void)argc; OptInit(argv,options,stderr); if( version ){ printf("Lemon version 1.0\n"); exit(0); } - if( OptNArgs() < 1 ){ + if( OptNArgs()!=1 ){ fprintf(stderr,"Exactly one filename argument is required.\n"); exit(1); } + memset(&lem, 0, sizeof(lem)); lem.errorcnt = 0; /* Initialize the machine */ @@ -1390,46 +1685,53 @@ char **argv; State_init(); lem.argv0 = argv[0]; lem.filename = OptArg(0); - lem.tmplname = (OptNArgs() == 2) ? OptArg(1) : def_tmpl_name; lem.basisflag = basisflag; - lem.has_fallback = 0; - lem.nconflict = 0; - lem.name = lem.include = lem.arg = lem.tokentype = lem.start = 0; - lem.vartype = 0; - lem.stacksize = 0; - lem.error = lem.overflow = lem.failure = lem.accept = lem.tokendest = - lem.tokenprefix = lem.outname = lem.extracode = 0; - lem.vardest = 0; - lem.tablesize = 0; + lem.nolinenosflag = nolinenosflag; + lem.printPreprocessed = printPP; Symbol_new("$"); - lem.errsym = Symbol_new("error"); /* Parse the input file */ Parse(&lem); - if( lem.errorcnt ) exit(lem.errorcnt); - if( lem.rule==0 ){ + if( lem.printPreprocessed || lem.errorcnt ) exit(lem.errorcnt); + if( lem.nrule==0 ){ fprintf(stderr,"Empty grammar.\n"); exit(1); } + lem.errsym = Symbol_find("error"); /* Count and index the symbols of the grammar */ Symbol_new("{default}"); lem.nsymbol = Symbol_count(); lem.symbols = Symbol_arrayof(); for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i; - qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), - (int(*)())Symbolcmpp); + qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i; - for(i=1; i<lem.nsymbol && isupper(lem.symbols[i]->name[0]); i++); - lem.nsymbol--; /*(do not count "{default}")*/ + while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } + assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 ); + lem.nsymbol = i - 1; + for(i=1; ISUPPER(lem.symbols[i]->name[0]); i++); lem.nterminal = i; + /* Assign sequential rule numbers. Start with 0. Put rules that have no + ** reduce action C-code associated with them last, so that the switch() + ** statement that selects reduction actions will have a smaller jump table. + */ + for(i=0, rp=lem.rule; rp; rp=rp->next){ + rp->iRule = rp->code ? i++ : -1; + } + lem.nruleWithAction = i; + for(rp=lem.rule; rp; rp=rp->next){ + if( rp->iRule<0 ) rp->iRule = i++; + } + lem.startRule = lem.rule; + lem.rule = Rule_sort(lem.rule); + /* Generate a reprint of the grammar, if requested on the command line */ if( rpflag ){ Reprint(&lem); }else{ /* Initialize the size for all follow and first sets */ - SetSize(lem.nterminal); + SetSize(lem.nterminal+1); /* Find the precedence for every production rule (that has one) */ FindRulePrecedences(&lem); @@ -1442,7 +1744,6 @@ char **argv; ** links so that the follow-set can be computed later */ lem.nstate = 0; FindStates(&lem); - lem.nstate = State_count(); lem.sorted = State_arrayof(); /* Tie up loose ends on the propagation links */ @@ -1457,11 +1758,16 @@ char **argv; /* Compress the action tables */ if( compress==0 ) CompressTables(&lem); + /* Reorder and renumber the states so that states with fewer choices + ** occur at the end. This is an optimization that helps make the + ** generated parser tables smaller. */ + if( noResort==0 ) ResortStates(&lem); + /* Generate a report of the parser generated. (the "y.output" file) */ if( !quiet ) ReportOutput(&lem); /* Generate the source code for the parser */ - ReportTable(&lem, mhflag); + ReportTable(&lem, mhflag, sqlFlag); /* Produce a header file for use by the scanner. (This step is ** omitted if the "-m" option is used because makeheaders will @@ -1469,15 +1775,25 @@ char **argv; if( !mhflag ) ReportHeader(&lem); } if( statistics ){ - printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n", - lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule); - printf(" %d states, %d parser table entries, %d conflicts\n", - lem.nstate, lem.tablesize, lem.nconflict); - } - if( lem.nconflict ){ + printf("Parser statistics:\n"); + stats_line("terminal symbols", lem.nterminal); + stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal); + stats_line("total symbols", lem.nsymbol); + stats_line("rules", lem.nrule); + stats_line("states", lem.nxstate); + stats_line("conflicts", lem.nconflict); + stats_line("action table entries", lem.nactiontab); + stats_line("lookahead table entries", lem.nlookaheadtab); + stats_line("total table size (bytes)", lem.tablesize); + } + if( lem.nconflict > 0 ){ fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); } - exit(lem.errorcnt + lem.nconflict); + + /* return 0 on success, 1 on failure. */ + exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; + exit(exitcode); + return (exitcode); } /******************** From the file "msort.c" *******************************/ /* @@ -1506,7 +1822,7 @@ char **argv; /* ** Return a pointer to the next structure in the linked list. */ -#define NEXT(A) (*(char**)(((unsigned long)A)+offset)) +#define NEXT(A) (*(char**)(((char*)A)+offset)) /* ** Inputs: @@ -1523,12 +1839,12 @@ char **argv; ** The "next" pointers for elements in the lists a and b are ** changed. */ -static char *merge(a,b,cmp,offset) -char *a; -char *b; -int (*cmp)(); -int offset; -{ +static char *merge( + char *a, + char *b, + int (*cmp)(const char*,const char*), + int offset +){ char *ptr, *head; if( a==0 ){ @@ -1536,7 +1852,7 @@ int offset; }else if( b==0 ){ head = a; }else{ - if( (*cmp)(a,b)<0 ){ + if( (*cmp)(a,b)<=0 ){ ptr = a; a = NEXT(a); }else{ @@ -1545,7 +1861,7 @@ int offset; } head = ptr; while( a && b ){ - if( (*cmp)(a,b)<0 ){ + if( (*cmp)(a,b)<=0 ){ NEXT(ptr) = a; ptr = a; a = NEXT(a); @@ -1575,13 +1891,16 @@ int offset; ** The "next" pointers for elements in list are changed. */ #define LISTSIZE 30 -void *msort(void *list, void **next, int(*cmp)(void *, void *)) -{ +static char *msort( + char *list, + char **next, + int (*cmp)(const char*,const char*) +){ unsigned long offset; char *ep; char *set[LISTSIZE]; int i; - offset = (unsigned long)next - (unsigned long)list; + offset = (unsigned long)((char*)next - (char*)list); for(i=0; i<LISTSIZE; i++) set[i] = 0; while( list ){ ep = list; @@ -1594,11 +1913,11 @@ void *msort(void *list, void **next, int(*cmp)(void *, void *)) set[i] = ep; } ep = 0; - for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(ep,set[i],cmp,offset); + for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(set[i],ep,cmp,offset); return ep; } /************************ From the file "option.c" **************************/ -static char **argv; +static char **g_argv; static struct s_options *op; static FILE *errstream; @@ -1608,22 +1927,21 @@ static FILE *errstream; ** Print the command line with a carrot pointing to the k-th character ** of the n-th field. */ -static void errline(n,k,err) -int n; -int k; -FILE *err; +static void errline(int n, int k, FILE *err) { - int spcnt = 0, i; - if( argv[0] ) { - fprintf(err,"%s",argv[0]); - spcnt += strlen(argv[0]) + 1; + int spcnt, i; + if( g_argv[0] ){ + fprintf(err,"%s",g_argv[0]); + spcnt = lemonStrlen(g_argv[0]) + 1; + }else{ + spcnt = 0; } - for(i=1; i<n && argv[i]; i++){ - fprintf(err," %s",argv[i]); - spcnt += strlen(argv[i]) + 1; + for(i=1; i<n && g_argv[i]; i++){ + fprintf(err," %s",g_argv[i]); + spcnt += lemonStrlen(g_argv[i])+1; } spcnt += k; - for(; argv[i]; i++) fprintf(err," %s",argv[i]); + for(; g_argv[i]; i++) fprintf(err," %s",g_argv[i]); if( spcnt<20 ){ fprintf(err,"\n%*s^-- here\n",spcnt,""); }else{ @@ -1635,18 +1953,17 @@ FILE *err; ** Return the index of the N-th non-switch argument. Return -1 ** if N is out of range. */ -static int argindex(n) -int n; +static int argindex(int n) { int i; int dashdash = 0; - if( argv!=0 && *argv!=0 ){ - for(i=1; argv[i]; i++){ - if( dashdash || !ISOPT(argv[i]) ){ + if( g_argv!=0 && *g_argv!=0 ){ + for(i=1; g_argv[i]; i++){ + if( dashdash || !ISOPT(g_argv[i]) ){ if( n==0 ) return i; n--; } - if( strcmp(argv[i],"--")==0 ) dashdash = 1; + if( strcmp(g_argv[i],"--")==0 ) dashdash = 1; } } return -1; @@ -1657,27 +1974,29 @@ static char emsg[] = "Command line syntax error: "; /* ** Process a flag command line argument. */ -static int handleflags(i,err) -int i; -FILE *err; +static int handleflags(int i, FILE *err) { int v; int errcnt = 0; int j; for(j=0; op[j].label; j++){ - if( strcmp(&argv[i][1],op[j].label)==0 ) break; + if( strncmp(&g_argv[i][1],op[j].label,lemonStrlen(op[j].label))==0 ) break; } - v = argv[i][0]=='-' ? 1 : 0; + v = g_argv[i][0]=='-' ? 1 : 0; if( op[j].label==0 ){ if( err ){ fprintf(err,"%sundefined option.\n",emsg); errline(i,1,err); } errcnt++; + }else if( op[j].arg==0 ){ + /* Ignore this option */ }else if( op[j].type==OPT_FLAG ){ *((int*)op[j].arg) = v; }else if( op[j].type==OPT_FFLAG ){ - (*(void(*)())(intptr_t)(op[j].arg))(v); + (*(void(*)(int))(op[j].arg))(v); + }else if( op[j].type==OPT_FSTR ){ + (*(void(*)(char *))(op[j].arg))(&g_argv[i][2]); }else{ if( err ){ fprintf(err,"%smissing argument on switch.\n",emsg); @@ -1691,9 +2010,7 @@ FILE *err; /* ** Process a command line switch which has an argument. */ -static int handleswitch(i,err) -int i; -FILE *err; +static int handleswitch(int i, FILE *err) { int lv = 0; double dv = 0.0; @@ -1701,11 +2018,11 @@ FILE *err; char *cp; int j; int errcnt = 0; - cp = strchr(argv[i],'='); - if( NULL == cp ) return 1; /*(should not happen; checked by caller)*/ + cp = strchr(g_argv[i],'='); + assert( cp!=0 ); *cp = 0; for(j=0; op[j].label; j++){ - if( strcmp(argv[i],op[j].label)==0 ) break; + if( strcmp(g_argv[i],op[j].label)==0 ) break; } *cp = '='; if( op[j].label==0 ){ @@ -1730,8 +2047,9 @@ FILE *err; dv = strtod(cp,&end); if( *end ){ if( err ){ - fprintf(err,"%sillegal character in floating-point argument.\n",emsg); - errline(i,((unsigned long)end)-(unsigned long)argv[i],err); + fprintf(err, + "%sillegal character in floating-point argument.\n",emsg); + errline(i,(int)((char*)end-(char*)g_argv[i]),err); } errcnt++; } @@ -1742,7 +2060,7 @@ FILE *err; if( *end ){ if( err ){ fprintf(err,"%sillegal character in integer argument.\n",emsg); - errline(i,((unsigned long)end)-(unsigned long)argv[i],err); + errline(i,(int)((char*)end-(char*)g_argv[i]),err); } errcnt++; } @@ -1760,40 +2078,37 @@ FILE *err; *(double*)(op[j].arg) = dv; break; case OPT_FDBL: - (*(void(*)())(intptr_t)(op[j].arg))(dv); + (*(void(*)(double))(op[j].arg))(dv); break; case OPT_INT: *(int*)(op[j].arg) = lv; break; case OPT_FINT: - (*(void(*)())(intptr_t)(op[j].arg))((int)lv); + (*(void(*)(int))(op[j].arg))((int)lv); break; case OPT_STR: *(char**)(op[j].arg) = sv; break; case OPT_FSTR: - (*(void(*)())(intptr_t)(op[j].arg))(sv); + (*(void(*)(char *))(op[j].arg))(sv); break; } } return errcnt; } -int OptInit(a,o,err) -char **a; -struct s_options *o; -FILE *err; +int OptInit(char **a, struct s_options *o, FILE *err) { int errcnt = 0; - argv = a; + g_argv = a; op = o; errstream = err; - if( argv && *argv && op ){ + if( g_argv && *g_argv && op ){ int i; - for(i=1; argv[i]; i++){ - if( argv[i][0]=='+' || argv[i][0]=='-' ){ + for(i=1; g_argv[i]; i++){ + if( g_argv[i][0]=='+' || g_argv[i][0]=='-' ){ errcnt += handleflags(i,err); - }else if( strchr(argv[i],'=') ){ + }else if( strchr(g_argv[i],'=') ){ errcnt += handleswitch(i,err); } } @@ -1806,41 +2121,39 @@ FILE *err; return 0; } -int OptNArgs(){ +int OptNArgs(void){ int cnt = 0; int dashdash = 0; int i; - if( argv!=0 && argv[0]!=0 ){ - for(i=1; argv[i]; i++){ - if( dashdash || !ISOPT(argv[i]) ) cnt++; - if( strcmp(argv[i],"--")==0 ) dashdash = 1; + if( g_argv!=0 && g_argv[0]!=0 ){ + for(i=1; g_argv[i]; i++){ + if( dashdash || !ISOPT(g_argv[i]) ) cnt++; + if( strcmp(g_argv[i],"--")==0 ) dashdash = 1; } } return cnt; } -char *OptArg(n) -int n; +char *OptArg(int n) { int i; i = argindex(n); - return i>=0 ? argv[i] : 0; + return i>=0 ? g_argv[i] : 0; } -void OptErr(n) -int n; +void OptErr(int n) { int i; i = argindex(n); if( i>=0 ) errline(i,0,errstream); } -void OptPrint(){ +void OptPrint(void){ int i; int max, len; max = 0; for(i=0; op[i].label; i++){ - len = strlen(op[i].label) + 1; + len = lemonStrlen(op[i].label) + 1; switch( op[i].type ){ case OPT_FLAG: case OPT_FFLAG: @@ -1868,18 +2181,18 @@ void OptPrint(){ break; case OPT_INT: case OPT_FINT: - fprintf(errstream," %s=<integer>%*s %s\n",op[i].label, - (int)(max-strlen(op[i].label)-9),"",op[i].message); + fprintf(errstream," -%s<integer>%*s %s\n",op[i].label, + (int)(max-lemonStrlen(op[i].label)-9),"",op[i].message); break; case OPT_DBL: case OPT_FDBL: - fprintf(errstream," %s=<real>%*s %s\n",op[i].label, - (int)(max-strlen(op[i].label)-6),"",op[i].message); + fprintf(errstream," -%s<real>%*s %s\n",op[i].label, + (int)(max-lemonStrlen(op[i].label)-6),"",op[i].message); break; case OPT_STR: case OPT_FSTR: - fprintf(errstream," %s=<string>%*s %s\n",op[i].label, - (int)(max-strlen(op[i].label)-8),"",op[i].message); + fprintf(errstream," -%s<string>%*s %s\n",op[i].label, + (int)(max-lemonStrlen(op[i].label)-8),"",op[i].message); break; } } @@ -1890,43 +2203,50 @@ void OptPrint(){ */ /* The state of the parser */ +enum e_state { + INITIALIZE, + WAITING_FOR_DECL_OR_RULE, + WAITING_FOR_DECL_KEYWORD, + WAITING_FOR_DECL_ARG, + WAITING_FOR_PRECEDENCE_SYMBOL, + WAITING_FOR_ARROW, + IN_RHS, + LHS_ALIAS_1, + LHS_ALIAS_2, + LHS_ALIAS_3, + RHS_ALIAS_1, + RHS_ALIAS_2, + PRECEDENCE_MARK_1, + PRECEDENCE_MARK_2, + RESYNC_AFTER_RULE_ERROR, + RESYNC_AFTER_DECL_ERROR, + WAITING_FOR_DESTRUCTOR_SYMBOL, + WAITING_FOR_DATATYPE_SYMBOL, + WAITING_FOR_FALLBACK_ID, + WAITING_FOR_WILDCARD_ID, + WAITING_FOR_CLASS_ID, + WAITING_FOR_CLASS_TOKEN, + WAITING_FOR_TOKEN_NAME +}; struct pstate { char *filename; /* Name of the input file */ int tokenlineno; /* Linenumber at which current token starts */ int errorcnt; /* Number of errors so far */ char *tokenstart; /* Text of current token */ struct lemon *gp; /* Global state vector */ - enum e_state { - INITIALIZE, - WAITING_FOR_DECL_OR_RULE, - WAITING_FOR_DECL_KEYWORD, - WAITING_FOR_DECL_ARG, - WAITING_FOR_PRECEDENCE_SYMBOL, - WAITING_FOR_ARROW, - IN_RHS, - LHS_ALIAS_1, - LHS_ALIAS_2, - LHS_ALIAS_3, - RHS_ALIAS_1, - RHS_ALIAS_2, - PRECEDENCE_MARK_1, - PRECEDENCE_MARK_2, - RESYNC_AFTER_RULE_ERROR, - RESYNC_AFTER_DECL_ERROR, - WAITING_FOR_DESTRUCTOR_SYMBOL, - WAITING_FOR_DATATYPE_SYMBOL, - WAITING_FOR_FALLBACK_ID - } state; /* The state of the parser */ + enum e_state state; /* The state of the parser */ struct symbol *fallback; /* The fallback token */ + struct symbol *tkclass; /* Token class symbol */ struct symbol *lhs; /* Left-hand side of current rule */ - char *lhsalias; /* Alias for the LHS */ + const char *lhsalias; /* Alias for the LHS */ int nrhs; /* Number of right-hand side symbols seen */ struct symbol *rhs[MAXRHS]; /* RHS symbols */ - char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */ + const char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */ struct rule *prevrule; /* Previous rule parsed */ - char *declkeyword; /* Keyword of a declaration */ + const char *declkeyword; /* Keyword of a declaration */ char **declargslot; /* Where the declaration argument should be put */ - int *decllnslot; /* Where the declaration linenumber is put */ + int insertLineMacro; /* Add #line before declaration insert */ + int *decllinenoslot; /* Where to write declaration line number */ enum e_assoc declassoc; /* Assign this association to decl arguments */ int preccounter; /* Assign this precedence to decl arguments */ struct rule *firstrule; /* Pointer to first rule in the grammar */ @@ -1934,10 +2254,9 @@ struct pstate { }; /* Parse a single token */ -static void parseonetoken(psp) -struct pstate *psp; +static void parseonetoken(struct pstate *psp) { - char *x; + const char *x; x = Strsafe(psp->tokenstart); /* Save the token permanently */ #if 0 printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno, @@ -1949,11 +2268,11 @@ struct pstate *psp; psp->preccounter = 0; psp->firstrule = psp->lastrule = 0; psp->gp->nrule = 0; - /* Fall through */ + /* fall through */ case WAITING_FOR_DECL_OR_RULE: if( x[0]=='%' ){ psp->state = WAITING_FOR_DECL_KEYWORD; - }else if( islower(x[0]) ){ + }else if( ISLOWER(x[0]) ){ psp->lhs = Symbol_new(x); psp->nrhs = 0; psp->lhsalias = 0; @@ -1961,18 +2280,21 @@ struct pstate *psp; }else if( x[0]=='{' ){ if( psp->prevrule==0 ){ ErrorMsg(psp->filename,psp->tokenlineno, -"There is not prior rule opon which to attach the code \ -fragment which begins on this line."); + "There is no prior rule upon which to attach the code " + "fragment which begins on this line."); psp->errorcnt++; - }else if( psp->prevrule->code!=0 ){ + }else if( psp->prevrule->code!=0 ){ ErrorMsg(psp->filename,psp->tokenlineno, -"Code fragment beginning on this line is not the first \ -to follow the previous rule."); + "Code fragment beginning on this line is not the first " + "to follow the previous rule."); psp->errorcnt++; + }else if( strcmp(x, "{NEVER-REDUCE")==0 ){ + psp->prevrule->neverReduce = 1; }else{ psp->prevrule->line = psp->tokenlineno; psp->prevrule->code = &x[1]; - } + psp->prevrule->noCode = 0; + } }else if( x[0]=='[' ){ psp->state = PRECEDENCE_MARK_1; }else{ @@ -1983,7 +2305,7 @@ to follow the previous rule."); } break; case PRECEDENCE_MARK_1: - if( !isupper(x[0]) ){ + if( !ISUPPER(x[0]) ){ ErrorMsg(psp->filename,psp->tokenlineno, "The precedence symbol must be a terminal."); psp->errorcnt++; @@ -1993,8 +2315,8 @@ to follow the previous rule."); psp->errorcnt++; }else if( psp->prevrule->precsym!=0 ){ ErrorMsg(psp->filename,psp->tokenlineno, -"Precedence mark on this line is not the first \ -to follow the previous rule."); + "Precedence mark on this line is not the first " + "to follow the previous rule."); psp->errorcnt++; }else{ psp->prevrule->precsym = Symbol_new(x); @@ -2023,7 +2345,7 @@ to follow the previous rule."); } break; case LHS_ALIAS_1: - if( isalpha(x[0]) ){ + if( ISALPHA(x[0]) ){ psp->lhsalias = x; psp->state = LHS_ALIAS_2; }else{ @@ -2058,26 +2380,28 @@ to follow the previous rule."); case IN_RHS: if( x[0]=='.' ){ struct rule *rp; - rp = (struct rule *)malloc( sizeof(struct rule) + - sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs ); + rp = (struct rule *)calloc( sizeof(struct rule) + + sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); if( rp==0 ){ ErrorMsg(psp->filename,psp->tokenlineno, "Can't allocate enough memory for this rule."); psp->errorcnt++; psp->prevrule = 0; - }else{ + }else{ int i; rp->ruleline = psp->tokenlineno; rp->rhs = (struct symbol**)&rp[1]; - rp->rhsalias = (char**)&(rp->rhs[psp->nrhs]); + rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]); for(i=0; i<psp->nrhs; i++){ rp->rhs[i] = psp->rhs[i]; rp->rhsalias[i] = psp->alias[i]; - } + if( rp->rhsalias[i]!=0 ){ rp->rhs[i]->bContent = 1; } + } rp->lhs = psp->lhs; rp->lhsalias = psp->lhsalias; rp->nrhs = psp->nrhs; rp->code = 0; + rp->noCode = 1; rp->precsym = 0; rp->index = psp->gp->nrule++; rp->nextlhs = rp->lhs->rule; @@ -2085,25 +2409,47 @@ to follow the previous rule."); rp->next = 0; if( psp->firstrule==0 ){ psp->firstrule = psp->lastrule = rp; - }else{ + }else{ psp->lastrule->next = rp; psp->lastrule = rp; - } + } psp->prevrule = rp; - } + } psp->state = WAITING_FOR_DECL_OR_RULE; - }else if( isalpha(x[0]) ){ + }else if( ISALPHA(x[0]) ){ if( psp->nrhs>=MAXRHS ){ ErrorMsg(psp->filename,psp->tokenlineno, - "Too many symbol on RHS or rule beginning at \"%s\".", + "Too many symbols on RHS of rule beginning at \"%s\".", x); psp->errorcnt++; psp->state = RESYNC_AFTER_RULE_ERROR; - }else{ + }else{ psp->rhs[psp->nrhs] = Symbol_new(x); psp->alias[psp->nrhs] = 0; psp->nrhs++; - } + } + }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 && ISUPPER(x[1]) ){ + struct symbol *msp = psp->rhs[psp->nrhs-1]; + if( msp->type!=MULTITERMINAL ){ + struct symbol *origsp = msp; + msp = (struct symbol *) calloc(1,sizeof(*msp)); + memset(msp, 0, sizeof(*msp)); + msp->type = MULTITERMINAL; + msp->nsubsym = 1; + msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*)); + msp->subsym[0] = origsp; + msp->name = origsp->name; + psp->rhs[psp->nrhs-1] = msp; + } + msp->nsubsym++; + msp->subsym = (struct symbol **) realloc(msp->subsym, + sizeof(struct symbol*)*msp->nsubsym); + msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); + if( ISLOWER(x[1]) || ISLOWER(msp->subsym[0]->name[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Cannot form a compound containing a non-terminal"); + psp->errorcnt++; + } }else if( x[0]=='(' && psp->nrhs>0 ){ psp->state = RHS_ALIAS_1; }else{ @@ -2114,7 +2460,7 @@ to follow the previous rule."); } break; case RHS_ALIAS_1: - if( isalpha(x[0]) ){ + if( ISALPHA(x[0]) ){ psp->alias[psp->nrhs-1] = x; psp->state = RHS_ALIAS_2; }else{ @@ -2136,49 +2482,52 @@ to follow the previous rule."); } break; case WAITING_FOR_DECL_KEYWORD: - if( isalpha(x[0]) ){ + if( ISALPHA(x[0]) ){ psp->declkeyword = x; psp->declargslot = 0; - psp->decllnslot = 0; + psp->decllinenoslot = 0; + psp->insertLineMacro = 1; psp->state = WAITING_FOR_DECL_ARG; if( strcmp(x,"name")==0 ){ psp->declargslot = &(psp->gp->name); - }else if( strcmp(x,"include")==0 ){ + psp->insertLineMacro = 0; + }else if( strcmp(x,"include")==0 ){ psp->declargslot = &(psp->gp->include); - psp->decllnslot = &psp->gp->includeln; - }else if( strcmp(x,"code")==0 ){ + }else if( strcmp(x,"code")==0 ){ psp->declargslot = &(psp->gp->extracode); - psp->decllnslot = &psp->gp->extracodeln; - }else if( strcmp(x,"token_destructor")==0 ){ + }else if( strcmp(x,"token_destructor")==0 ){ psp->declargslot = &psp->gp->tokendest; - psp->decllnslot = &psp->gp->tokendestln; - }else if( strcmp(x,"default_destructor")==0 ){ + }else if( strcmp(x,"default_destructor")==0 ){ psp->declargslot = &psp->gp->vardest; - psp->decllnslot = &psp->gp->vardestln; - }else if( strcmp(x,"token_prefix")==0 ){ + }else if( strcmp(x,"token_prefix")==0 ){ psp->declargslot = &psp->gp->tokenprefix; - }else if( strcmp(x,"syntax_error")==0 ){ + psp->insertLineMacro = 0; + }else if( strcmp(x,"syntax_error")==0 ){ psp->declargslot = &(psp->gp->error); - psp->decllnslot = &psp->gp->errorln; - }else if( strcmp(x,"parse_accept")==0 ){ + }else if( strcmp(x,"parse_accept")==0 ){ psp->declargslot = &(psp->gp->accept); - psp->decllnslot = &psp->gp->acceptln; - }else if( strcmp(x,"parse_failure")==0 ){ + }else if( strcmp(x,"parse_failure")==0 ){ psp->declargslot = &(psp->gp->failure); - psp->decllnslot = &psp->gp->failureln; - }else if( strcmp(x,"stack_overflow")==0 ){ + }else if( strcmp(x,"stack_overflow")==0 ){ psp->declargslot = &(psp->gp->overflow); - psp->decllnslot = &psp->gp->overflowln; }else if( strcmp(x,"extra_argument")==0 ){ psp->declargslot = &(psp->gp->arg); + psp->insertLineMacro = 0; + }else if( strcmp(x,"extra_context")==0 ){ + psp->declargslot = &(psp->gp->ctx); + psp->insertLineMacro = 0; }else if( strcmp(x,"token_type")==0 ){ psp->declargslot = &(psp->gp->tokentype); + psp->insertLineMacro = 0; }else if( strcmp(x,"default_type")==0 ){ psp->declargslot = &(psp->gp->vartype); + psp->insertLineMacro = 0; }else if( strcmp(x,"stack_size")==0 ){ psp->declargslot = &(psp->gp->stacksize); + psp->insertLineMacro = 0; }else if( strcmp(x,"start_symbol")==0 ){ psp->declargslot = &(psp->gp->start); + psp->insertLineMacro = 0; }else if( strcmp(x,"left")==0 ){ psp->preccounter++; psp->declassoc = LEFT; @@ -2191,19 +2540,25 @@ to follow the previous rule."); psp->preccounter++; psp->declassoc = NONE; psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; - }else if( strcmp(x,"destructor")==0 ){ + }else if( strcmp(x,"destructor")==0 ){ psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; - }else if( strcmp(x,"type")==0 ){ + }else if( strcmp(x,"type")==0 ){ psp->state = WAITING_FOR_DATATYPE_SYMBOL; }else if( strcmp(x,"fallback")==0 ){ psp->fallback = 0; psp->state = WAITING_FOR_FALLBACK_ID; + }else if( strcmp(x,"token")==0 ){ + psp->state = WAITING_FOR_TOKEN_NAME; + }else if( strcmp(x,"wildcard")==0 ){ + psp->state = WAITING_FOR_WILDCARD_ID; + }else if( strcmp(x,"token_class")==0 ){ + psp->state = WAITING_FOR_CLASS_ID; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Unknown declaration keyword: \"%%%s\".",x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; - } + } }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Illegal declaration keyword: \"%s\".",x); @@ -2212,7 +2567,7 @@ to follow the previous rule."); } break; case WAITING_FOR_DESTRUCTOR_SYMBOL: - if( !isalpha(x[0]) ){ + if( !ISALPHA(x[0]) ){ ErrorMsg(psp->filename,psp->tokenlineno, "Symbol name missing after %%destructor keyword"); psp->errorcnt++; @@ -2220,37 +2575,48 @@ to follow the previous rule."); }else{ struct symbol *sp = Symbol_new(x); psp->declargslot = &sp->destructor; - psp->decllnslot = &sp->destructorln; + psp->decllinenoslot = &sp->destLineno; + psp->insertLineMacro = 1; psp->state = WAITING_FOR_DECL_ARG; } break; case WAITING_FOR_DATATYPE_SYMBOL: - if( !isalpha(x[0]) ){ + if( !ISALPHA(x[0]) ){ ErrorMsg(psp->filename,psp->tokenlineno, - "Symbol name missing after %%destructor keyword"); + "Symbol name missing after %%type keyword"); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; }else{ - struct symbol *sp = Symbol_new(x); - psp->declargslot = &sp->datatype; - psp->decllnslot = 0; - psp->state = WAITING_FOR_DECL_ARG; + struct symbol *sp = Symbol_find(x); + if((sp) && (sp->datatype)){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol %%type \"%s\" already defined", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + if (!sp){ + sp = Symbol_new(x); + } + psp->declargslot = &sp->datatype; + psp->insertLineMacro = 0; + psp->state = WAITING_FOR_DECL_ARG; + } } break; case WAITING_FOR_PRECEDENCE_SYMBOL: if( x[0]=='.' ){ psp->state = WAITING_FOR_DECL_OR_RULE; - }else if( isupper(x[0]) ){ + }else if( ISUPPER(x[0]) ){ struct symbol *sp; sp = Symbol_new(x); if( sp->prec>=0 ){ ErrorMsg(psp->filename,psp->tokenlineno, "Symbol \"%s\" has already be given a precedence.",x); psp->errorcnt++; - }else{ + }else{ sp->prec = psp->preccounter; sp->assoc = psp->declassoc; - } + } }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Can't assign a precedence to \"%s\".",x); @@ -2258,18 +2624,59 @@ to follow the previous rule."); } break; case WAITING_FOR_DECL_ARG: - if( (x[0]=='{' || x[0]=='\"' || isalnum(x[0])) ){ - if( *(psp->declargslot)!=0 ){ - ErrorMsg(psp->filename,psp->tokenlineno, - "The argument \"%s\" to declaration \"%%%s\" is not the first.", - x[0]=='\"' ? &x[1] : x,psp->declkeyword); - psp->errorcnt++; - psp->state = RESYNC_AFTER_DECL_ERROR; - }else{ - *(psp->declargslot) = (x[0]=='\"' || x[0]=='{') ? &x[1] : x; - if( psp->decllnslot ) *psp->decllnslot = psp->tokenlineno; - psp->state = WAITING_FOR_DECL_OR_RULE; - } + if( x[0]=='{' || x[0]=='\"' || ISALNUM(x[0]) ){ + const char *zOld, *zNew; + char *zBuf, *z; + int nOld, n, nLine = 0, nNew, nBack; + int addLineMacro; + char zLine[50]; + zNew = x; + if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; + nNew = lemonStrlen(zNew); + if( *psp->declargslot ){ + zOld = *psp->declargslot; + }else{ + zOld = ""; + } + nOld = lemonStrlen(zOld); + n = nOld + nNew + 20; + addLineMacro = !psp->gp->nolinenosflag + && psp->insertLineMacro + && psp->tokenlineno>1 + && (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); + if( addLineMacro ){ + for(z=psp->filename, nBack=0; *z; z++){ + if( *z=='\\' ) nBack++; + } + lemon_sprintf(zLine, "#line %d ", psp->tokenlineno); + nLine = lemonStrlen(zLine); + n += nLine + lemonStrlen(psp->filename) + nBack; + } + *psp->declargslot = (char *) realloc(*psp->declargslot, n); + zBuf = *psp->declargslot + nOld; + if( addLineMacro ){ + if( nOld && zBuf[-1]!='\n' ){ + *(zBuf++) = '\n'; + } + memcpy(zBuf, zLine, nLine); + zBuf += nLine; + *(zBuf++) = '"'; + for(z=psp->filename; *z; z++){ + if( *z=='\\' ){ + *(zBuf++) = '\\'; + } + *(zBuf++) = *z; + } + *(zBuf++) = '"'; + *(zBuf++) = '\n'; + } + if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ + psp->decllinenoslot[0] = psp->tokenlineno; + } + memcpy(zBuf, zNew, nNew); + zBuf += nNew; + *zBuf = 0; + psp->state = WAITING_FOR_DECL_OR_RULE; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Illegal argument to %%%s: %s",psp->declkeyword,x); @@ -2280,7 +2687,7 @@ to follow the previous rule."); case WAITING_FOR_FALLBACK_ID: if( x[0]=='.' ){ psp->state = WAITING_FOR_DECL_OR_RULE; - }else if( !isupper(x[0]) ){ + }else if( !ISUPPER(x[0]) ){ ErrorMsg(psp->filename, psp->tokenlineno, "%%fallback argument \"%s\" should be a token", x); psp->errorcnt++; @@ -2298,6 +2705,78 @@ to follow the previous rule."); } } break; + case WAITING_FOR_TOKEN_NAME: + /* Tokens do not have to be declared before use. But they can be + ** in order to control their assigned integer number. The number for + ** each token is assigned when it is first seen. So by including + ** + ** %token ONE TWO THREE. + ** + ** early in the grammar file, that assigns small consecutive values + ** to each of the tokens ONE TWO and THREE. + */ + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + (void)Symbol_new(x); + } + break; + case WAITING_FOR_WILDCARD_ID: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%wildcard argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + struct symbol *sp = Symbol_new(x); + if( psp->gp->wildcard==0 ){ + psp->gp->wildcard = sp; + }else{ + ErrorMsg(psp->filename, psp->tokenlineno, + "Extra wildcard to token: %s", x); + psp->errorcnt++; + } + } + break; + case WAITING_FOR_CLASS_ID: + if( !ISLOWER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token_class must be followed by an identifier: %s", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else if( Symbol_find(x) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "Symbol \"%s\" already used", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + psp->tkclass = Symbol_new(x); + psp->tkclass->type = MULTITERMINAL; + psp->state = WAITING_FOR_CLASS_TOKEN; + } + break; + case WAITING_FOR_CLASS_TOKEN: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISUPPER(x[0]) || ((x[0]=='|' || x[0]=='/') && ISUPPER(x[1])) ){ + struct symbol *msp = psp->tkclass; + msp->nsubsym++; + msp->subsym = (struct symbol **) realloc(msp->subsym, + sizeof(struct symbol*)*msp->nsubsym); + if( !ISUPPER(x[0]) ) x++; + msp->subsym[msp->nsubsym-1] = Symbol_new(x); + }else{ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token_class argument \"%s\" should be a token", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; case RESYNC_AFTER_RULE_ERROR: /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; ** break; */ @@ -2308,24 +2787,181 @@ to follow the previous rule."); } } +/* The text in the input is part of the argument to an %ifdef or %ifndef. +** Evaluate the text as a boolean expression. Return true or false. +*/ +static int eval_preprocessor_boolean(char *z, int lineno){ + int neg = 0; + int res = 0; + int okTerm = 1; + int i; + for(i=0; z[i]!=0; i++){ + if( ISSPACE(z[i]) ) continue; + if( z[i]=='!' ){ + if( !okTerm ) goto pp_syntax_error; + neg = !neg; + continue; + } + if( z[i]=='|' && z[i+1]=='|' ){ + if( okTerm ) goto pp_syntax_error; + if( res ) return 1; + i++; + okTerm = 1; + continue; + } + if( z[i]=='&' && z[i+1]=='&' ){ + if( okTerm ) goto pp_syntax_error; + if( !res ) return 0; + i++; + okTerm = 1; + continue; + } + if( z[i]=='(' ){ + int k; + int n = 1; + if( !okTerm ) goto pp_syntax_error; + for(k=i+1; z[k]; k++){ + if( z[k]==')' ){ + n--; + if( n==0 ){ + z[k] = 0; + res = eval_preprocessor_boolean(&z[i+1], -1); + z[k] = ')'; + if( res<0 ){ + i = i-res; + goto pp_syntax_error; + } + i = k; + break; + } + }else if( z[k]=='(' ){ + n++; + }else if( z[k]==0 ){ + i = k; + goto pp_syntax_error; + } + } + if( neg ){ + res = !res; + neg = 0; + } + okTerm = 0; + continue; + } + if( ISALPHA(z[i]) ){ + int j, k, n; + if( !okTerm ) goto pp_syntax_error; + for(k=i+1; ISALNUM(z[k]) || z[k]=='_'; k++){} + n = k - i; + res = 0; + for(j=0; j<nDefine; j++){ + if( strncmp(azDefine[j],&z[i],n)==0 && azDefine[j][n]==0 ){ + res = 1; + break; + } + } + i = k-1; + if( neg ){ + res = !res; + neg = 0; + } + okTerm = 0; + continue; + } + goto pp_syntax_error; + } + return res; + +pp_syntax_error: + if( lineno>0 ){ + fprintf(stderr, "%%if syntax error on line %d.\n", lineno); + fprintf(stderr, " %.*s <-- syntax error here\n", i+1, z); + exit(1); + }else{ + return -(i+1); + } +} + +/* Run the preprocessor over the input file text. The global variables +** azDefine[0] through azDefine[nDefine-1] contains the names of all defined +** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and +** comments them out. Text in between is also commented out as appropriate. +*/ +static void preprocess_input(char *z){ + int i, j, k; + int exclude = 0; + int start = 0; + int lineno = 1; + int start_lineno = 1; + for(i=0; z[i]; i++){ + if( z[i]=='\n' ) lineno++; + if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; + if( strncmp(&z[i],"%endif",6)==0 && ISSPACE(z[i+6]) ){ + if( exclude ){ + exclude--; + if( exclude==0 ){ + for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' '; + } + } + for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' '; + }else if( strncmp(&z[i],"%else",5)==0 && ISSPACE(z[i+5]) ){ + if( exclude==1){ + exclude = 0; + for(j=start; j<i; j++) if( z[j]!='\n' ) z[j] = ' '; + }else if( exclude==0 ){ + exclude = 1; + start = i; + start_lineno = lineno; + } + for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' '; + }else if( strncmp(&z[i],"%ifdef ",7)==0 + || strncmp(&z[i],"%if ",4)==0 + || strncmp(&z[i],"%ifndef ",8)==0 ){ + if( exclude ){ + exclude++; + }else{ + int isNot; + int iBool; + for(j=i; z[j] && !ISSPACE(z[j]); j++){} + iBool = j; + isNot = (j==i+7); + while( z[j] && z[j]!='\n' ){ j++; } + k = z[j]; + z[j] = 0; + exclude = eval_preprocessor_boolean(&z[iBool], lineno); + z[j] = k; + if( !isNot ) exclude = !exclude; + if( exclude ){ + start = i; + start_lineno = lineno; + } + } + for(j=i; z[j] && z[j]!='\n'; j++) z[j] = ' '; + } + } + if( exclude ){ + fprintf(stderr,"unterminated %%ifdef starting on line %d\n", start_lineno); + exit(1); + } +} + /* In spite of its name, this function is really a scanner. It read ** in the entire input file (all at once) then tokenizes it. Each ** token is passed to the function "parseonetoken" which builds all ** the appropriate data structures in the global state vector "gp". */ -struct pstate ps; -void Parse(gp) -struct lemon *gp; +void Parse(struct lemon *gp) { + struct pstate ps; FILE *fp; char *filebuf; - size_t filesize; + unsigned int filesize; int lineno; int c; char *cp, *nextcp; int startline = 0; - long epos; + memset(&ps, '\0', sizeof(ps)); ps.gp = gp; ps.filename = gp->filename; ps.errorcnt = 0; @@ -2339,39 +2975,39 @@ struct lemon *gp; return; } fseek(fp,0,2); - epos = ftell(fp); - if( -1 == epos ) { - ErrorMsg(ps.filename,0,"Can't determine file size."); - fclose(fp); - gp->errorcnt++; - return; - } - filesize = (size_t)epos; + filesize = ftell(fp); rewind(fp); filebuf = (char *)malloc( filesize+1 ); - if( filebuf==0 ){ - ErrorMsg(ps.filename,0,"Can't allocate %zu of memory to hold this file.", - filesize+1); - fclose(fp); + if( filesize>100000000 || filebuf==0 ){ + ErrorMsg(ps.filename,0,"Input file too large."); + free(filebuf); gp->errorcnt++; + fclose(fp); return; } if( fread(filebuf,1,filesize,fp)!=filesize ){ - ErrorMsg(ps.filename,0,"Can't read in all %zu bytes of this file.", + ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", filesize); free(filebuf); - fclose(fp); gp->errorcnt++; + fclose(fp); return; } fclose(fp); filebuf[filesize] = 0; + /* Make an initial pass through the file to handle %ifdef and %ifndef */ + preprocess_input(filebuf); + if( gp->printPreprocessed ){ + printf("%s\n", filebuf); + return; + } + /* Now scan the text of the input file */ lineno = 1; for(cp=filebuf; (c= *cp)!=0; ){ if( c=='\n' ) lineno++; /* Keep track of the line number */ - if( isspace(c) ){ cp++; continue; } /* Skip all white space */ + if( ISSPACE(c) ){ cp++; continue; } /* Skip all white space */ if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ cp+=2; while( (c= *cp)!=0 && c!='\n' ) cp++; @@ -2379,6 +3015,7 @@ struct lemon *gp; } if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ cp+=2; + if( (*cp)=='/' ) cp++; while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ if( c=='\n' ) lineno++; cp++; @@ -2396,7 +3033,8 @@ struct lemon *gp; } if( c==0 ){ ErrorMsg(ps.filename,startline, -"String starting on this line is not terminated before the end of the file."); + "String starting on this line is not terminated before " + "the end of the file."); ps.errorcnt++; nextcp = cp; }else{ @@ -2417,12 +3055,12 @@ struct lemon *gp; if( c=='\n' ) lineno++; prevc = c; cp++; - } - }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ + } + }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ cp = &cp[2]; while( (c= *cp)!=0 && c!='\n' ) cp++; if( c ) lineno++; - }else if( c=='\'' || c=='\"' ){ /* String a character literals */ + }else if( c=='\'' || c=='\"' ){ /* String a character literals */ int startchar, prevc; startchar = c; prevc = 0; @@ -2430,23 +3068,28 @@ struct lemon *gp; if( c=='\n' ) lineno++; if( prevc=='\\' ) prevc = 0; else prevc = c; - } - } + } + } } if( c==0 ){ ErrorMsg(ps.filename,ps.tokenlineno, -"C code starting on this line is not terminated before the end of the file."); + "C code starting on this line is not terminated before " + "the end of the file."); ps.errorcnt++; nextcp = cp; }else{ nextcp = cp+1; } - }else if( isalnum(c) ){ /* Identifiers */ - while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++; + }else if( ISALNUM(c) ){ /* Identifiers */ + while( (c= *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; nextcp = cp; }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ cp += 3; nextcp = cp; + }else if( (c=='/' || c=='|') && ISALPHA(cp[1]) ){ + cp += 2; + while( (c = *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; + nextcp = cp; }else{ /* All other (one character) operators */ cp++; nextcp = cp; @@ -2454,7 +3097,7 @@ struct lemon *gp; c = *cp; *cp = 0; /* Null terminate the token */ parseonetoken(&ps); /* Parse the token */ - *cp = c; /* Restore the buffer */ + *cp = (char)c; /* Restore the buffer */ cp = nextcp; } free(filebuf); /* Release the buffer after parsing */ @@ -2469,13 +3112,13 @@ struct lemon *gp; static struct plink *plink_freelist = 0; /* Allocate a new plink */ -struct plink *Plink_new(){ - struct plink *new; +struct plink *Plink_new(void){ + struct plink *newlink; if( plink_freelist==0 ){ int i; int amt = 100; - plink_freelist = (struct plink *)malloc( sizeof(struct plink)*amt ); + plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); if( plink_freelist==0 ){ fprintf(stderr, "Unable to allocate memory for a new follow-set propagation link.\n"); @@ -2484,27 +3127,23 @@ struct plink *Plink_new(){ for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1]; plink_freelist[amt-1].next = 0; } - new = plink_freelist; + newlink = plink_freelist; plink_freelist = plink_freelist->next; - return new; + return newlink; } /* Add a plink to a plink list */ -void Plink_add(plpp,cfp) -struct plink **plpp; -struct config *cfp; +void Plink_add(struct plink **plpp, struct config *cfp) { - struct plink *new; - new = Plink_new(); - new->next = *plpp; - *plpp = new; - new->cfp = cfp; + struct plink *newlink; + newlink = Plink_new(); + newlink->next = *plpp; + *plpp = newlink; + newlink->cfp = cfp; } /* Transfer every plink on the list "from" to the list "to" */ -void Plink_copy(to,from) -struct plink **to; -struct plink *from; +void Plink_copy(struct plink **to, struct plink *from) { struct plink *nextpl; while( from ){ @@ -2516,8 +3155,7 @@ struct plink *from; } /* Delete every plink on the list */ -void Plink_delete(plp) -struct plink *plp; +void Plink_delete(struct plink *plp) { struct plink *nextpl; @@ -2537,41 +3175,46 @@ struct plink *plp; ** name comes from malloc() and must be freed by the calling ** function. */ -PRIVATE char *file_makename(lemp,suffix) -struct lemon *lemp; -char *suffix; +PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) { char *name; char *cp; - - name = malloc( strlen(out_dir) + strlen(lemp->filename) + strlen(suffix) + 6 ); + char *filename = lemp->filename; + int sz; + + if( outputDir ){ + cp = strrchr(filename, '/'); + if( cp ) filename = cp + 1; + } + sz = lemonStrlen(filename); + sz += lemonStrlen(suffix); + if( outputDir ) sz += lemonStrlen(outputDir) + 1; + sz += 5; + name = (char*)malloc( sz ); if( name==0 ){ fprintf(stderr,"Can't allocate space for a filename.\n"); exit(1); } - /* skip directory, JK */ - if (NULL == (cp = strrchr(lemp->filename, '/'))) { - cp = lemp->filename; - } else { - cp++; - } - strcpy(name,out_dir); - strcat(name,"/"); - strcat(name,cp); + name[0] = 0; + if( outputDir ){ + lemon_strcpy(name, outputDir); + lemon_strcat(name, "/"); + } + lemon_strcat(name,filename); cp = strrchr(name,'.'); if( cp ) *cp = 0; - strcat(name,suffix); + lemon_strcat(name,suffix); return name; } /* Open a file with a name based on the name of the input file, ** but with a different (specified) suffix, and return a pointer ** to the stream */ -PRIVATE FILE *file_open(lemp,suffix,mode) -struct lemon *lemp; -char *suffix; -char *mode; -{ +PRIVATE FILE *file_open( + struct lemon *lemp, + const char *suffix, + const char *mode +){ FILE *fp; if( lemp->outname ) free(lemp->outname); @@ -2585,10 +3228,30 @@ char *mode; return fp; } +/* Print the text of a rule +*/ +void rule_print(FILE *out, struct rule *rp){ + int i, j; + fprintf(out, "%s",rp->lhs->name); + /* if( rp->lhsalias ) fprintf(out,"(%s)",rp->lhsalias); */ + fprintf(out," ::="); + for(i=0; i<rp->nrhs; i++){ + struct symbol *sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + fprintf(out," %s", sp->subsym[0]->name); + for(j=1; j<sp->nsubsym; j++){ + fprintf(out,"|%s", sp->subsym[j]->name); + } + }else{ + fprintf(out," %s", sp->name); + } + /* if( rp->rhsalias[i] ) fprintf(out,"(%s)",rp->rhsalias[i]); */ + } +} + /* Duplicate the input file without comments and without actions ** on rules */ -void Reprint(lemp) -struct lemon *lemp; +void Reprint(struct lemon *lemp) { struct rule *rp; struct symbol *sp; @@ -2597,7 +3260,7 @@ struct lemon *lemp; maxlen = 10; for(i=0; i<lemp->nsymbol; i++){ sp = lemp->symbols[i]; - len = strlen(sp->name); + len = lemonStrlen(sp->name); if( len>maxlen ) maxlen = len; } ncolumns = 76/(maxlen+5); @@ -2613,37 +3276,43 @@ struct lemon *lemp; printf("\n"); } for(rp=lemp->rule; rp; rp=rp->next){ - printf("%s",rp->lhs->name); -/* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ - printf(" ::="); - for(i=0; i<rp->nrhs; i++){ - printf(" %s",rp->rhs[i]->name); -/* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ - } + rule_print(stdout, rp); printf("."); if( rp->precsym ) printf(" [%s]",rp->precsym->name); -/* if( rp->code ) printf("\n %s",rp->code); */ + /* if( rp->code ) printf("\n %s",rp->code); */ printf("\n"); } } -PRIVATE void ConfigPrint(fp,cfp) -FILE *fp; -struct config *cfp; -{ - struct rule *rp; - int i; - rp = cfp->rp; +/* Print a single rule. +*/ +void RulePrint(FILE *fp, struct rule *rp, int iCursor){ + struct symbol *sp; + int i, j; fprintf(fp,"%s ::=",rp->lhs->name); for(i=0; i<=rp->nrhs; i++){ - if( i==cfp->dot ) fprintf(fp," *"); + if( i==iCursor ) fprintf(fp," *"); if( i==rp->nrhs ) break; - fprintf(fp," %s",rp->rhs[i]->name); + sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + fprintf(fp," %s", sp->subsym[0]->name); + for(j=1; j<sp->nsubsym; j++){ + fprintf(fp,"|%s",sp->subsym[j]->name); + } + }else{ + fprintf(fp," %s", sp->name); + } } } +/* Print the rule for a configuration. +*/ +void ConfigPrint(FILE *fp, struct config *cfp){ + RulePrint(fp, cfp->rp, cfp->dot); +} + /* #define TEST */ -#ifdef TEST +#if 0 /* Print a set */ PRIVATE void SetPrint(out,set,lemp) FILE *out; @@ -2664,13 +3333,13 @@ struct lemon *lemp; } /* Print a plink chain */ -void PlinkPrint(out,plp,tag) +PRIVATE void PlinkPrint(out,plp,tag) FILE *out; struct plink *plp; char *tag; { while( plp ){ - fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->index); + fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); ConfigPrint(out,plp->cfp); fprintf(out,"\n"); plp = plp->next; @@ -2681,63 +3350,99 @@ char *tag; /* Print an action to the given file descriptor. Return FALSE if ** nothing was actually printed. */ -PRIVATE int PrintAction(struct action *ap, FILE *fp, int indent){ +int PrintAction( + struct action *ap, /* The action to print */ + FILE *fp, /* Print the action here */ + int indent /* Indent by this amount */ +){ int result = 1; switch( ap->type ){ - case SHIFT: - fprintf(fp,"%*s shift %d",indent,ap->sp->name,ap->x.stp->index); + case SHIFT: { + struct state *stp = ap->x.stp; + fprintf(fp,"%*s shift %-7d",indent,ap->sp->name,stp->statenum); break; - case REDUCE: - fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index); + } + case REDUCE: { + struct rule *rp = ap->x.rp; + fprintf(fp,"%*s reduce %-7d",indent,ap->sp->name,rp->iRule); + RulePrint(fp, rp, -1); break; + } + case SHIFTREDUCE: { + struct rule *rp = ap->x.rp; + fprintf(fp,"%*s shift-reduce %-7d",indent,ap->sp->name,rp->iRule); + RulePrint(fp, rp, -1); + break; + } case ACCEPT: fprintf(fp,"%*s accept",indent,ap->sp->name); break; case ERROR: fprintf(fp,"%*s error",indent,ap->sp->name); break; - case CONFLICT: - fprintf(fp,"%*s reduce %-3d ** Parsing conflict **", - indent,ap->sp->name,ap->x.rp->index); + case SRCONFLICT: + case RRCONFLICT: + fprintf(fp,"%*s reduce %-7d ** Parsing conflict **", + indent,ap->sp->name,ap->x.rp->iRule); + break; + case SSCONFLICT: + fprintf(fp,"%*s shift %-7d ** Parsing conflict **", + indent,ap->sp->name,ap->x.stp->statenum); break; case SH_RESOLVED: + if( showPrecedenceConflict ){ + fprintf(fp,"%*s shift %-7d -- dropped by precedence", + indent,ap->sp->name,ap->x.stp->statenum); + }else{ + result = 0; + } + break; case RD_RESOLVED: + if( showPrecedenceConflict ){ + fprintf(fp,"%*s reduce %-7d -- dropped by precedence", + indent,ap->sp->name,ap->x.rp->iRule); + }else{ + result = 0; + } + break; case NOT_USED: result = 0; break; } + if( result && ap->spOpt ){ + fprintf(fp," /* because %s==%s */", ap->sp->name, ap->spOpt->name); + } return result; } -/* Generate the "y.output" log file */ -void ReportOutput(lemp) -struct lemon *lemp; +/* Generate the "*.out" log file */ +void ReportOutput(struct lemon *lemp) { - int i; + int i, n; struct state *stp; struct config *cfp; struct action *ap; + struct rule *rp; FILE *fp; - fp = file_open(lemp,".out","w"); + fp = file_open(lemp,".out","wb"); if( fp==0 ) return; - fprintf(fp," \b"); - for(i=0; i<lemp->nstate; i++){ + for(i=0; i<lemp->nxstate; i++){ stp = lemp->sorted[i]; - fprintf(fp,"State %d:\n",stp->index); + fprintf(fp,"State %d:\n",stp->statenum); if( lemp->basisflag ) cfp=stp->bp; else cfp=stp->cfp; while( cfp ){ char buf[20]; if( cfp->dot==cfp->rp->nrhs ){ - sprintf(buf,"(%d)",cfp->rp->index); + lemon_sprintf(buf,"(%d)",cfp->rp->iRule); fprintf(fp," %5s ",buf); }else{ fprintf(fp," "); } ConfigPrint(fp,cfp); fprintf(fp,"\n"); -#ifdef TEST +#if 0 SetPrint(fp,cfp->fws,lemp); PlinkPrint(fp,cfp->fplp,"To "); PlinkPrint(fp,cfp->bplp,"From"); @@ -2751,18 +3456,72 @@ struct lemon *lemp; } fprintf(fp,"\n"); } + fprintf(fp, "----------------------------------------------------\n"); + fprintf(fp, "Symbols:\n"); + fprintf(fp, "The first-set of non-terminals is shown after the name.\n\n"); + for(i=0; i<lemp->nsymbol; i++){ + int j; + struct symbol *sp; + + sp = lemp->symbols[i]; + fprintf(fp, " %3d: %s", i, sp->name); + if( sp->type==NONTERMINAL ){ + fprintf(fp, ":"); + if( sp->lambda ){ + fprintf(fp, " <lambda>"); + } + for(j=0; j<lemp->nterminal; j++){ + if( sp->firstset && SetFind(sp->firstset, j) ){ + fprintf(fp, " %s", lemp->symbols[j]->name); + } + } + } + if( sp->prec>=0 ) fprintf(fp," (precedence=%d)", sp->prec); + fprintf(fp, "\n"); + } + fprintf(fp, "----------------------------------------------------\n"); + fprintf(fp, "Syntax-only Symbols:\n"); + fprintf(fp, "The following symbols never carry semantic content.\n\n"); + for(i=n=0; i<lemp->nsymbol; i++){ + int w; + struct symbol *sp = lemp->symbols[i]; + if( sp->bContent ) continue; + w = (int)strlen(sp->name); + if( n>0 && n+w>75 ){ + fprintf(fp,"\n"); + n = 0; + } + if( n>0 ){ + fprintf(fp, " "); + n++; + } + fprintf(fp, "%s", sp->name); + n += w; + } + if( n>0 ) fprintf(fp, "\n"); + fprintf(fp, "----------------------------------------------------\n"); + fprintf(fp, "Rules:\n"); + for(rp=lemp->rule; rp; rp=rp->next){ + fprintf(fp, "%4d: ", rp->iRule); + rule_print(fp, rp); + fprintf(fp,"."); + if( rp->precsym ){ + fprintf(fp," [%s precedence=%d]", + rp->precsym->name, rp->precsym->prec); + } + fprintf(fp,"\n"); + } fclose(fp); return; } /* Search for the file "name" which is in the same directory as ** the executable */ -PRIVATE char *pathsearch(argv0,name,modemask) -char *argv0; -char *name; -int modemask; +PRIVATE char *pathsearch(char *argv0, char *name, int modemask) { - char *pathlist; + const char *pathlist; + char *pathbufptr = 0; + char *pathbuf = 0; char *path,*cp; char c; @@ -2774,26 +3533,30 @@ int modemask; if( cp ){ c = *cp; *cp = 0; - path = (char *)malloc( strlen(argv0) + strlen(name) + 2 ); - if( path ) sprintf(path,"%s/%s",argv0,name); + path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 ); + if( path ) lemon_sprintf(path,"%s/%s",argv0,name); *cp = c; }else{ pathlist = getenv("PATH"); if( pathlist==0 ) pathlist = ".:/bin:/usr/bin"; - path = (char *)malloc( strlen(pathlist)+strlen(name)+2 ); - if( path!=0 ){ - while( *pathlist ){ - cp = strchr(pathlist,':'); - if( cp==0 ) cp = &pathlist[strlen(pathlist)]; + pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1 ); + path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 ); + if( (pathbuf != 0) && (path!=0) ){ + pathbufptr = pathbuf; + lemon_strcpy(pathbuf, pathlist); + while( *pathbuf ){ + cp = strchr(pathbuf,':'); + if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)]; c = *cp; *cp = 0; - sprintf(path,"%s/%s",pathlist,name); + lemon_sprintf(path,"%s/%s",pathbuf,name); *cp = c; - if( c==0 ) pathlist = ""; - else pathlist = &cp[1]; + if( c==0 ) pathbuf[0] = 0; + else pathbuf = &cp[1]; if( access(path,modemask)==0 ) break; } } + free(pathbufptr); } return path; } @@ -2802,16 +3565,27 @@ int modemask; ** which is to be put in the action table of the generated machine. ** Return negative if no action should be generated. */ -PRIVATE int compute_action(lemp,ap) -struct lemon *lemp; -struct action *ap; +PRIVATE int compute_action(struct lemon *lemp, struct action *ap) { int act; switch( ap->type ){ - case SHIFT: act = ap->x.stp->index; break; - case REDUCE: act = ap->x.rp->index + lemp->nstate; break; - case ERROR: act = lemp->nstate + lemp->nrule; break; - case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break; + case SHIFT: act = ap->x.stp->statenum; break; + case SHIFTREDUCE: { + /* Since a SHIFT is inherient after a prior REDUCE, convert any + ** SHIFTREDUCE action with a nonterminal on the LHS into a simple + ** REDUCE action: */ + if( ap->sp->index>=lemp->nterminal + && (lemp->errsym==0 || ap->sp->index!=lemp->errsym->index) + ){ + act = lemp->minReduce + ap->x.rp->iRule; + }else{ + act = lemp->minShiftReduce + ap->x.rp->iRule; + } + break; + } + case REDUCE: act = lemp->minReduce + ap->x.rp->iRule; break; + case ERROR: act = lemp->errAction; break; + case ACCEPT: act = lemp->accAction; break; default: act = -1; break; } return act; @@ -2827,11 +3601,7 @@ struct action *ap; ** if name!=0, then any word that begin with "Parse" is changed to ** begin with *name instead. */ -PRIVATE void tplt_xfer(name,in,out,lineno) -char *name; -FILE *in; -FILE *out; -int *lineno; +PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) { int i, iStart; char line[LINESIZE]; @@ -2841,7 +3611,7 @@ int *lineno; if( name ){ for(i=0; line[i]; i++){ if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0 - && (i==0 || !isalpha(line[i-1])) + && (i==0 || !ISALPHA(line[i-1])) ){ if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]); fprintf(out,"%s",name); @@ -2854,62 +3624,101 @@ int *lineno; } } +/* Skip forward past the header of the template file to the first "%%" +*/ +PRIVATE void tplt_skip_header(FILE *in, int *lineno) +{ + char line[LINESIZE]; + while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ + (*lineno)++; + } +} + /* The next function finds the template file and opens it, returning ** a pointer to the opened file. */ -PRIVATE FILE *tplt_open(lemp) -struct lemon *lemp; +PRIVATE FILE *tplt_open(struct lemon *lemp) { - + static char templatename[] = "lempar.c"; char buf[1000]; FILE *in; char *tpltname; - char *tpltname_alloc = NULL; + char *toFree = 0; char *cp; + /* first, see if user specified a template filename on the command line. */ + if (user_templatename != 0) { + if( access(user_templatename,004)==-1 ){ + fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", + user_templatename); + lemp->errorcnt++; + return 0; + } + in = fopen(user_templatename,"rb"); + if( in==0 ){ + fprintf(stderr,"Can't open the template file \"%s\".\n", + user_templatename); + lemp->errorcnt++; + return 0; + } + return in; + } + cp = strrchr(lemp->filename,'.'); if( cp ){ - sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); + lemon_sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); }else{ - sprintf(buf,"%s.lt",lemp->filename); + lemon_sprintf(buf,"%s.lt",lemp->filename); } if( access(buf,004)==0 ){ tpltname = buf; - }else if( access(lemp->tmplname,004)==0 ){ - tpltname = lemp->tmplname; + }else if( access(templatename,004)==0 ){ + tpltname = templatename; }else{ - tpltname = tpltname_alloc = pathsearch(lemp->argv0,lemp->tmplname,0); + toFree = tpltname = pathsearch(lemp->argv0,templatename,0); } if( tpltname==0 ){ fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", - lemp->tmplname); + templatename); lemp->errorcnt++; return 0; } - in = fopen(tpltname,"r"); + in = fopen(tpltname,"rb"); if( in==0 ){ fprintf(stderr,"Can't open the template file \"%s\".\n",tpltname); lemp->errorcnt++; } - if (tpltname_alloc) free(tpltname_alloc); + free(toFree); return in; } +/* Print a #line directive line to the output file. */ +PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) +{ + fprintf(out,"#line %d \"",lineno); + while( *filename ){ + if( *filename == '\\' ) putc('\\',out); + putc(*filename,out); + filename++; + } + fprintf(out,"\"\n"); +} + /* Print a string to the file and keep the linenumber up to date */ -PRIVATE void tplt_print(out,lemp,str,strln,lineno) -FILE *out; -struct lemon *lemp; -char *str; -int strln; -int *lineno; +PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) { if( str==0 ) return; - fprintf(out,"#line %d \"%s\"\n",strln,lemp->filename); (*lineno)++; while( *str ){ - if( *str=='\n' ) (*lineno)++; putc(*str,out); + if( *str=='\n' ) (*lineno)++; str++; } - fprintf(out,"\n#line %d \"%s\"\n",*lineno+2,lemp->outname); (*lineno)+=2; + if( str[-1]!='\n' ){ + putc('\n',out); + (*lineno)++; + } + if (!lemp->nolinenosflag) { + (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); + } return; } @@ -2917,26 +3726,31 @@ int *lineno; ** The following routine emits code for the destructor for the ** symbol sp */ -PRIVATE void emit_destructor_code(out,sp,lemp,lineno) -FILE *out; -struct symbol *sp; -struct lemon *lemp; -int *lineno; -{ +void emit_destructor_code( + FILE *out, + struct symbol *sp, + struct lemon *lemp, + int *lineno +){ char *cp = 0; - int linecnt = 0; if( sp->type==TERMINAL ){ cp = lemp->tokendest; if( cp==0 ) return; - fprintf(out,"#line %d \"%s\"\n{",lemp->tokendestln,lemp->filename); + fprintf(out,"{\n"); (*lineno)++; }else if( sp->destructor ){ cp = sp->destructor; - fprintf(out,"#line %d \"%s\"\n{",sp->destructorln,lemp->filename); - }else{ + fprintf(out,"{\n"); (*lineno)++; + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,sp->destLineno,lemp->filename); + } + }else if( lemp->vardest ){ cp = lemp->vardest; if( cp==0 ) return; - fprintf(out,"#line %d \"%s\"\n{",lemp->vardestln,lemp->filename); + fprintf(out,"{\n"); (*lineno)++; + }else{ + assert( 0 ); /* Cannot happen */ } for(; *cp; cp++){ if( *cp=='$' && cp[1]=='$' ){ @@ -2944,20 +3758,21 @@ int *lineno; cp++; continue; } - if( *cp=='\n' ) linecnt++; + if( *cp=='\n' ) (*lineno)++; fputc(*cp,out); } - (*lineno) += 3 + linecnt; - fprintf(out,"}\n#line %d \"%s\"\n",*lineno,lemp->outname); + fprintf(out,"\n"); (*lineno)++; + if (!lemp->nolinenosflag) { + (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); + } + fprintf(out,"}\n"); (*lineno)++; return; } /* ** Return TRUE (non-zero) if the given symbol has a destructor. */ -PRIVATE int has_destructor(sp, lemp) -struct symbol *sp; -struct lemon *lemp; +int has_destructor(struct symbol *sp, struct lemon *lemp) { int ret; if( sp->type==TERMINAL ){ @@ -2969,83 +3784,301 @@ struct lemon *lemp; } /* +** Append text to a dynamically allocated string. If zText is 0 then +** reset the string to be empty again. Always return the complete text +** of the string (which is overwritten with each call). +** +** n bytes of zText are stored. If n==0 then all of zText up to the first +** \000 terminator is stored. zText can contain up to two instances of +** %d. The values of p1 and p2 are written into the first and second +** %d. +** +** If n==-1, then the previous character is overwritten. +*/ +PRIVATE char *append_str(const char *zText, int n, int p1, int p2){ + static char empty[1] = { 0 }; + static char *z = 0; + static int alloced = 0; + static int used = 0; + int c; + char zInt[40]; + if( zText==0 ){ + if( used==0 && z!=0 ) z[0] = 0; + used = 0; + return z; + } + if( n<=0 ){ + if( n<0 ){ + used += n; + assert( used>=0 ); + } + n = lemonStrlen(zText); + } + if( (int) (n+sizeof(zInt)*2+used) >= alloced ){ + alloced = n + sizeof(zInt)*2 + used + 200; + z = (char *) realloc(z, alloced); + } + if( z==0 ) return empty; + while( n-- > 0 ){ + c = *(zText++); + if( c=='%' && n>0 && zText[0]=='d' ){ + lemon_sprintf(zInt, "%d", p1); + p1 = p2; + lemon_strcpy(&z[used], zInt); + used += lemonStrlen(&z[used]); + zText++; + n--; + }else{ + z[used++] = (char)c; + } + } + z[used] = 0; + return z; +} + +/* +** Write and transform the rp->code string so that symbols are expanded. +** Populate the rp->codePrefix and rp->codeSuffix strings, as appropriate. +** +** Return 1 if the expanded code requires that "yylhsminor" local variable +** to be defined. +*/ +PRIVATE int translate_code(struct lemon *lemp, struct rule *rp){ + char *cp, *xp; + int i; + int rc = 0; /* True if yylhsminor is used */ + int dontUseRhs0 = 0; /* If true, use of left-most RHS label is illegal */ + const char *zSkip = 0; /* The zOvwrt comment within rp->code, or NULL */ + char lhsused = 0; /* True if the LHS element has been used */ + char lhsdirect; /* True if LHS writes directly into stack */ + char used[MAXRHS]; /* True for each RHS element which is used */ + char zLhs[50]; /* Convert the LHS symbol into this string */ + char zOvwrt[900]; /* Comment that to allow LHS to overwrite RHS */ + + for(i=0; i<rp->nrhs; i++) used[i] = 0; + lhsused = 0; + + if( rp->code==0 ){ + static char newlinestr[2] = { '\n', '\0' }; + rp->code = newlinestr; + rp->line = rp->ruleline; + rp->noCode = 1; + }else{ + rp->noCode = 0; + } + + + if( rp->nrhs==0 ){ + /* If there are no RHS symbols, then writing directly to the LHS is ok */ + lhsdirect = 1; + }else if( rp->rhsalias[0]==0 ){ + /* The left-most RHS symbol has no value. LHS direct is ok. But + ** we have to call the destructor on the RHS symbol first. */ + lhsdirect = 1; + if( has_destructor(rp->rhs[0],lemp) ){ + append_str(0,0,0,0); + append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, + rp->rhs[0]->index,1-rp->nrhs); + rp->codePrefix = Strsafe(append_str(0,0,0,0)); + rp->noCode = 0; + } + }else if( rp->lhsalias==0 ){ + /* There is no LHS value symbol. */ + lhsdirect = 1; + }else if( strcmp(rp->lhsalias,rp->rhsalias[0])==0 ){ + /* The LHS symbol and the left-most RHS symbol are the same, so + ** direct writing is allowed */ + lhsdirect = 1; + lhsused = 1; + used[0] = 1; + if( rp->lhs->dtnum!=rp->rhs[0]->dtnum ){ + ErrorMsg(lemp->filename,rp->ruleline, + "%s(%s) and %s(%s) share the same label but have " + "different datatypes.", + rp->lhs->name, rp->lhsalias, rp->rhs[0]->name, rp->rhsalias[0]); + lemp->errorcnt++; + } + }else{ + lemon_sprintf(zOvwrt, "/*%s-overwrites-%s*/", + rp->lhsalias, rp->rhsalias[0]); + zSkip = strstr(rp->code, zOvwrt); + if( zSkip!=0 ){ + /* The code contains a special comment that indicates that it is safe + ** for the LHS label to overwrite left-most RHS label. */ + lhsdirect = 1; + }else{ + lhsdirect = 0; + } + } + if( lhsdirect ){ + sprintf(zLhs, "yymsp[%d].minor.yy%d",1-rp->nrhs,rp->lhs->dtnum); + }else{ + rc = 1; + sprintf(zLhs, "yylhsminor.yy%d",rp->lhs->dtnum); + } + + append_str(0,0,0,0); + + /* This const cast is wrong but harmless, if we're careful. */ + for(cp=(char *)rp->code; *cp; cp++){ + if( cp==zSkip ){ + append_str(zOvwrt,0,0,0); + cp += lemonStrlen(zOvwrt)-1; + dontUseRhs0 = 1; + continue; + } + if( ISALPHA(*cp) && (cp==rp->code || (!ISALNUM(cp[-1]) && cp[-1]!='_')) ){ + char saved; + for(xp= &cp[1]; ISALNUM(*xp) || *xp=='_'; xp++); + saved = *xp; + *xp = 0; + if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ + append_str(zLhs,0,0,0); + cp = xp; + lhsused = 1; + }else{ + for(i=0; i<rp->nrhs; i++){ + if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ + if( i==0 && dontUseRhs0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s used after '%s'.", + rp->rhsalias[0], zOvwrt); + lemp->errorcnt++; + }else if( cp!=rp->code && cp[-1]=='@' ){ + /* If the argument is of the form @X then substituted + ** the token number of X, not the value of X */ + append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0); + }else{ + struct symbol *sp = rp->rhs[i]; + int dtnum; + if( sp->type==MULTITERMINAL ){ + dtnum = sp->subsym[0]->dtnum; + }else{ + dtnum = sp->dtnum; + } + append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum); + } + cp = xp; + used[i] = 1; + break; + } + } + } + *xp = saved; + } + append_str(cp, 1, 0, 0); + } /* End loop */ + + /* Main code generation completed */ + cp = append_str(0,0,0,0); + if( cp && cp[0] ) rp->code = Strsafe(cp); + append_str(0,0,0,0); + + /* Check to make sure the LHS has been used */ + if( rp->lhsalias && !lhsused ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label \"%s\" for \"%s(%s)\" is never used.", + rp->lhsalias,rp->lhs->name,rp->lhsalias); + lemp->errorcnt++; + } + + /* Generate destructor code for RHS minor values which are not referenced. + ** Generate error messages for unused labels and duplicate labels. + */ + for(i=0; i<rp->nrhs; i++){ + if( rp->rhsalias[i] ){ + if( i>0 ){ + int j; + if( rp->lhsalias && strcmp(rp->lhsalias,rp->rhsalias[i])==0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "%s(%s) has the same label as the LHS but is not the left-most " + "symbol on the RHS.", + rp->rhs[i]->name, rp->rhsalias[i]); + lemp->errorcnt++; + } + for(j=0; j<i; j++){ + if( rp->rhsalias[j] && strcmp(rp->rhsalias[j],rp->rhsalias[i])==0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s used for multiple symbols on the RHS of a rule.", + rp->rhsalias[i]); + lemp->errorcnt++; + break; + } + } + } + if( !used[i] ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s for \"%s(%s)\" is never used.", + rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); + lemp->errorcnt++; + } + }else if( i>0 && has_destructor(rp->rhs[i],lemp) ){ + append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, + rp->rhs[i]->index,i-rp->nrhs+1); + } + } + + /* If unable to write LHS values directly into the stack, write the + ** saved LHS value now. */ + if( lhsdirect==0 ){ + append_str(" yymsp[%d].minor.yy%d = ", 0, 1-rp->nrhs, rp->lhs->dtnum); + append_str(zLhs, 0, 0, 0); + append_str(";\n", 0, 0, 0); + } + + /* Suffix code generation complete */ + cp = append_str(0,0,0,0); + if( cp && cp[0] ){ + rp->codeSuffix = Strsafe(cp); + rp->noCode = 0; + } + + return rc; +} + +/* ** Generate code which executes when the rule "rp" is reduced. Write ** the code to "out". Make sure lineno stays up-to-date. */ -PRIVATE void emit_code(out,rp,lemp,lineno) -FILE *out; -struct rule *rp; -struct lemon *lemp; -int *lineno; -{ - char *cp, *xp; - int linecnt = 0; - int i; - char lhsused = 0; /* True if the LHS element has been used */ - char used[MAXRHS]; /* True for each RHS element which is used */ - - for(i=0; i<rp->nrhs; i++) used[i] = 0; - lhsused = 0; +PRIVATE void emit_code( + FILE *out, + struct rule *rp, + struct lemon *lemp, + int *lineno +){ + const char *cp; + + /* Setup code prior to the #line directive */ + if( rp->codePrefix && rp->codePrefix[0] ){ + fprintf(out, "{%s", rp->codePrefix); + for(cp=rp->codePrefix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + } /* Generate code to do the reduce action */ if( rp->code ){ - fprintf(out,"#line %d \"%s\"\n{",rp->line,lemp->filename); - for(cp=rp->code; *cp; cp++){ - if( isalpha(*cp) && (cp==rp->code || (!isalnum(cp[-1]) && cp[-1]!='_')) ){ - char saved; - for(xp= &cp[1]; isalnum(*xp) || *xp=='_'; xp++); - saved = *xp; - *xp = 0; - if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ - fprintf(out,"yygotominor.yy%d",rp->lhs->dtnum); - cp = xp; - lhsused = 1; - }else{ - for(i=0; i<rp->nrhs; i++){ - if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ - fprintf(out,"yymsp[%d].minor.yy%d",i-rp->nrhs+1,rp->rhs[i]->dtnum); - cp = xp; - used[i] = 1; - break; - } - } - } - *xp = saved; - } - if( *cp=='\n' ) linecnt++; - fputc(*cp,out); - } /* End loop */ - (*lineno) += 3 + linecnt; - fprintf(out,"}\n#line %d \"%s\"\n",*lineno,lemp->outname); - } /* End if( rp->code ) */ - - /* Check to make sure the LHS has been used */ - if( rp->lhsalias && !lhsused ){ - ErrorMsg(lemp->filename,rp->ruleline, - "Label \"%s\" for \"%s(%s)\" is never used.", - rp->lhsalias,rp->lhs->name,rp->lhsalias); - lemp->errorcnt++; + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,rp->line,lemp->filename); + } + fprintf(out,"{%s",rp->code); + for(cp=rp->code; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + fprintf(out,"}\n"); (*lineno)++; + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,*lineno,lemp->outname); + } } - /* Generate destructor code for RHS symbols which are not used in the - ** reduce code */ - for(i=0; i<rp->nrhs; i++){ - if( rp->rhsalias[i] && !used[i] ){ - ErrorMsg(lemp->filename,rp->ruleline, - "Label %s for \"%s(%s)\" is never used.", - rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); - lemp->errorcnt++; - }else if( rp->rhsalias[i]==0 ){ - if( has_destructor(rp->rhs[i],lemp) ){ - fprintf(out," yy_destructor(%d,&yymsp[%d].minor);\n", - rp->rhs[i]->index,i-rp->nrhs+1); (*lineno)++; - }else{ - fprintf(out," /* No destructor defined for %s */\n", - rp->rhs[i]->name); - (*lineno)++; - } - } + /* Generate breakdown code that occurs after the #line directive */ + if( rp->codeSuffix && rp->codeSuffix[0] ){ + fprintf(out, "%s", rp->codeSuffix); + for(cp=rp->codeSuffix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } } + + if( rp->codePrefix ){ + fprintf(out, "}\n"); (*lineno)++; + } + return; } @@ -3056,38 +4089,42 @@ int *lineno; ** union, also set the ".dtnum" field of every terminal and nonterminal ** symbol. */ -PRIVATE void print_stack_union(out,lemp,plineno,mhflag) -FILE *out; /* The output stream */ -struct lemon *lemp; /* The main info structure for this parser */ -int *plineno; /* Pointer to the line number */ -int mhflag; /* True if generating makeheaders output */ -{ +void print_stack_union( + FILE *out, /* The output stream */ + struct lemon *lemp, /* The main info structure for this parser */ + int *plineno, /* Pointer to the line number */ + int mhflag /* True if generating makeheaders output */ +){ int lineno; /* The line number of the output */ char **types; /* A hash table of datatypes */ int arraysize; /* Size of the "types" array */ int maxdtlength; /* Maximum length of any ".datatype" field. */ char *stddt; /* Standardized name for a datatype */ int i,j; /* Loop counters */ - int hash; /* For hashing the name of a type */ - char *name; /* Name of the parser */ + unsigned hash; /* For hashing the name of a type */ + const char *name; /* Name of the parser */ /* Allocate and initialize types[] and allocate stddt[] */ arraysize = lemp->nsymbol * 2; - types = (char**)malloc( arraysize * sizeof(char*) ); + types = (char**)calloc( arraysize, sizeof(char*) ); + if( types==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } for(i=0; i<arraysize; i++) types[i] = 0; maxdtlength = 0; if( lemp->vartype ){ - maxdtlength = strlen(lemp->vartype); + maxdtlength = lemonStrlen(lemp->vartype); } for(i=0; i<lemp->nsymbol; i++){ int len; struct symbol *sp = lemp->symbols[i]; if( sp->datatype==0 ) continue; - len = strlen(sp->datatype); + len = lemonStrlen(sp->datatype); if( len>maxdtlength ) maxdtlength = len; } stddt = (char*)malloc( maxdtlength*2 + 1 ); - if( types==0 || stddt==0 ){ + if( stddt==0 ){ fprintf(stderr,"Out of memory.\n"); exit(1); } @@ -3112,13 +4149,17 @@ int mhflag; /* True if generating makeheaders output */ cp = sp->datatype; if( cp==0 ) cp = lemp->vartype; j = 0; - while( isspace(*cp) ) cp++; + while( ISSPACE(*cp) ) cp++; while( *cp ) stddt[j++] = *cp++; - while( j>0 && isspace(stddt[j-1]) ) j--; + while( j>0 && ISSPACE(stddt[j-1]) ) j--; stddt[j] = 0; + if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){ + sp->dtnum = 0; + continue; + } hash = 0; for(j=0; stddt[j]; j++){ - hash = (unsigned int)hash*53u + (unsigned int) stddt[j]; + hash = hash*53 + stddt[j]; } hash = (hash & 0x7fffffff)%arraysize; while( types[hash] ){ @@ -3127,16 +4168,16 @@ int mhflag; /* True if generating makeheaders output */ break; } hash++; - if( hash>=arraysize ) hash = 0; + if( hash>=(unsigned)arraysize ) hash = 0; } if( types[hash]==0 ){ sp->dtnum = hash + 1; - types[hash] = (char*)malloc( strlen(stddt)+1 ); + types[hash] = (char*)malloc( lemonStrlen(stddt)+1 ); if( types[hash]==0 ){ fprintf(stderr,"Out of memory.\n"); exit(1); } - strcpy(types[hash],stddt); + lemon_strcpy(types[hash],stddt); } } @@ -3148,13 +4189,16 @@ int mhflag; /* True if generating makeheaders output */ lemp->tokentype?lemp->tokentype:"void*"); lineno++; if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } fprintf(out,"typedef union {\n"); lineno++; + fprintf(out," int yyinit;\n"); lineno++; fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++; for(i=0; i<arraysize; i++){ if( types[i]==0 ) continue; fprintf(out," %s yy%d;\n",types[i],i+1); lineno++; free(types[i]); } - fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; + if( lemp->errsym && lemp->errsym->useCnt ){ + fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; + } free(stddt); free(types); fprintf(out,"} YYMINORTYPE;\n"); lineno++; @@ -3163,24 +4207,32 @@ int mhflag; /* True if generating makeheaders output */ /* ** Return the name of a C datatype able to represent values between -** lwr and upr, inclusive. +** lwr and upr, inclusive. If pnByte!=NULL then also write the sizeof +** for that type (1, 2, or 4) into *pnByte. */ -static const char *minimum_size_type(int lwr, int upr){ +static const char *minimum_size_type(int lwr, int upr, int *pnByte){ + const char *zType = "int"; + int nByte = 4; if( lwr>=0 ){ if( upr<=255 ){ - return "unsigned char"; + zType = "unsigned char"; + nByte = 1; }else if( upr<65535 ){ - return "unsigned short int"; + zType = "unsigned short int"; + nByte = 2; }else{ - return "unsigned int"; + zType = "unsigned int"; + nByte = 4; } }else if( lwr>=-127 && upr<=127 ){ - return "signed char"; + zType = "signed char"; + nByte = 1; }else if( lwr>=-32767 && upr<32767 ){ - return "short"; - }else{ - return "int"; + zType = "short"; + nByte = 2; } + if( pnByte ) *pnByte = nByte; + return zType; } /* @@ -3193,6 +4245,7 @@ struct axset { struct state *stp; /* A pointer to a state */ int isTkn; /* True to use tokens. False for non-terminals */ int nAction; /* Number of actions */ + int iOrder; /* Original order of action sets */ }; /* @@ -3201,147 +4254,270 @@ struct axset { static int axset_compare(const void *a, const void *b){ struct axset *p1 = (struct axset*)a; struct axset *p2 = (struct axset*)b; - return p2->nAction - p1->nAction; + int c; + c = p2->nAction - p1->nAction; + if( c==0 ){ + c = p1->iOrder - p2->iOrder; + } + assert( c!=0 || p1==p2 ); + return c; } +/* +** Write text on "out" that describes the rule "rp". +*/ +static void writeRuleText(FILE *out, struct rule *rp){ + int j; + fprintf(out,"%s ::=", rp->lhs->name); + for(j=0; j<rp->nrhs; j++){ + struct symbol *sp = rp->rhs[j]; + if( sp->type!=MULTITERMINAL ){ + fprintf(out," %s", sp->name); + }else{ + int k; + fprintf(out," %s", sp->subsym[0]->name); + for(k=1; k<sp->nsubsym; k++){ + fprintf(out,"|%s",sp->subsym[k]->name); + } + } + } +} + + /* Generate C source code for the parser */ -void ReportTable(lemp, mhflag) -struct lemon *lemp; -int mhflag; /* Output in makeheaders format if true */ -{ - FILE *out, *in; - char line[LINESIZE]; +void ReportTable( + struct lemon *lemp, + int mhflag, /* Output in makeheaders format if true */ + int sqlFlag /* Generate the *.sql file too */ +){ + FILE *out, *in, *sql; int lineno; struct state *stp; struct action *ap; struct rule *rp; struct acttab *pActtab; - int i, j, n; + int i, j, n, sz; + int nLookAhead; + int szActionType; /* sizeof(YYACTIONTYPE) */ + int szCodeType; /* sizeof(YYCODETYPE) */ + const char *name; int mnTknOfst, mxTknOfst; int mnNtOfst, mxNtOfst; struct axset *ax; - char *name; + char *prefix; + + lemp->minShiftReduce = lemp->nstate; + lemp->errAction = lemp->minShiftReduce + lemp->nrule; + lemp->accAction = lemp->errAction + 1; + lemp->noAction = lemp->accAction + 1; + lemp->minReduce = lemp->noAction + 1; + lemp->maxAction = lemp->minReduce + lemp->nrule; in = tplt_open(lemp); if( in==0 ) return; - out = file_open(lemp,".c","w"); + out = file_open(lemp,".c","wb"); if( out==0 ){ fclose(in); return; } + if( sqlFlag==0 ){ + sql = 0; + }else{ + sql = file_open(lemp, ".sql", "wb"); + if( sql==0 ){ + fclose(in); + fclose(out); + return; + } + fprintf(sql, + "BEGIN;\n" + "CREATE TABLE symbol(\n" + " id INTEGER PRIMARY KEY,\n" + " name TEXT NOT NULL,\n" + " isTerminal BOOLEAN NOT NULL,\n" + " fallback INTEGER REFERENCES symbol" + " DEFERRABLE INITIALLY DEFERRED\n" + ");\n" + ); + for(i=0; i<lemp->nsymbol; i++){ + fprintf(sql, + "INSERT INTO symbol(id,name,isTerminal,fallback)" + "VALUES(%d,'%s',%s", + i, lemp->symbols[i]->name, + i<lemp->nterminal ? "TRUE" : "FALSE" + ); + if( lemp->symbols[i]->fallback ){ + fprintf(sql, ",%d);\n", lemp->symbols[i]->fallback->index); + }else{ + fprintf(sql, ",NULL);\n"); + } + } + fprintf(sql, + "CREATE TABLE rule(\n" + " ruleid INTEGER PRIMARY KEY,\n" + " lhs INTEGER REFERENCES symbol(id),\n" + " txt TEXT\n" + ");\n" + "CREATE TABLE rulerhs(\n" + " ruleid INTEGER REFERENCES rule(ruleid),\n" + " pos INTEGER,\n" + " sym INTEGER REFERENCES symbol(id)\n" + ");\n" + ); + for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ + assert( i==rp->iRule ); + fprintf(sql, + "INSERT INTO rule(ruleid,lhs,txt)VALUES(%d,%d,'", + rp->iRule, rp->lhs->index + ); + writeRuleText(sql, rp); + fprintf(sql,"');\n"); + for(j=0; j<rp->nrhs; j++){ + struct symbol *sp = rp->rhs[j]; + if( sp->type!=MULTITERMINAL ){ + fprintf(sql, + "INSERT INTO rulerhs(ruleid,pos,sym)VALUES(%d,%d,%d);\n", + i,j,sp->index + ); + }else{ + int k; + for(k=0; k<sp->nsubsym; k++){ + fprintf(sql, + "INSERT INTO rulerhs(ruleid,pos,sym)VALUES(%d,%d,%d);\n", + i,j,sp->subsym[k]->index + ); + } + } + } + } + fprintf(sql, "COMMIT;\n"); + } lineno = 1; - tplt_xfer(lemp->name,in,out,&lineno); + + fprintf(out, + "/* This file is automatically generated by Lemon from input grammar\n" + "** source file \"%s\". */\n", lemp->filename); lineno += 2; + + /* The first %include directive begins with a C-language comment, + ** then skip over the header comment of the template file + */ + if( lemp->include==0 ) lemp->include = ""; + for(i=0; ISSPACE(lemp->include[i]); i++){ + if( lemp->include[i]=='\n' ){ + lemp->include += i+1; + i = -1; + } + } + if( lemp->include[0]=='/' ){ + tplt_skip_header(in,&lineno); + }else{ + tplt_xfer(lemp->name,in,out,&lineno); + } /* Generate the include code, if any */ - tplt_print(out,lemp,lemp->include,lemp->includeln,&lineno); + tplt_print(out,lemp,lemp->include,&lineno); if( mhflag ){ - name = file_makename(lemp, ".h"); - fprintf(out,"#include \"%s\"\n", name); lineno++; - free(name); + char *incName = file_makename(lemp, ".h"); + fprintf(out,"#include \"%s\"\n", incName); lineno++; + free(incName); } tplt_xfer(lemp->name,in,out,&lineno); /* Generate #defines for all tokens */ + if( lemp->tokenprefix ) prefix = lemp->tokenprefix; + else prefix = ""; if( mhflag ){ - char *prefix; fprintf(out,"#if INTERFACE\n"); lineno++; - if( lemp->tokenprefix ) prefix = lemp->tokenprefix; - else prefix = ""; - for(i=1; i<lemp->nterminal; i++){ - fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); - lineno++; - } - fprintf(out,"#endif\n"); lineno++; + }else{ + fprintf(out,"#ifndef %s%s\n", prefix, lemp->symbols[1]->name); } + for(i=1; i<lemp->nterminal; i++){ + fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); + lineno++; + } + fprintf(out,"#endif\n"); lineno++; tplt_xfer(lemp->name,in,out,&lineno); /* Generate the defines */ - fprintf(out,"/* \001 */\n"); fprintf(out,"#define YYCODETYPE %s\n", - minimum_size_type(0, lemp->nsymbol+5)); lineno++; - fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++; + minimum_size_type(0, lemp->nsymbol, &szCodeType)); lineno++; + fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol); lineno++; fprintf(out,"#define YYACTIONTYPE %s\n", - minimum_size_type(0, lemp->nstate+lemp->nrule+5)); lineno++; + minimum_size_type(0,lemp->maxAction,&szActionType)); lineno++; + if( lemp->wildcard ){ + fprintf(out,"#define YYWILDCARD %d\n", + lemp->wildcard->index); lineno++; + } print_stack_union(out,lemp,&lineno,mhflag); + fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++; if( lemp->stacksize ){ - if( atoi(lemp->stacksize)<=0 ){ - ErrorMsg(lemp->filename,0, -"Illegal stack size: [%s]. The stack size should be an integer constant.", - lemp->stacksize); - lemp->errorcnt++; - lemp->stacksize = "100"; - } fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++; }else{ fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++; } + fprintf(out, "#endif\n"); lineno++; if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; } name = lemp->name ? lemp->name : "Parse"; if( lemp->arg && lemp->arg[0] ){ - i = strlen(lemp->arg); - while( i>=1 && isspace(lemp->arg[i-1]) ) i--; - while( i>=1 && (isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; + i = lemonStrlen(lemp->arg); + while( i>=1 && ISSPACE(lemp->arg[i-1]) ) i--; + while( i>=1 && (ISALNUM(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++; fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++; - fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n", + fprintf(out,"#define %sARG_PARAM ,%s\n",name,&lemp->arg[i]); lineno++; + fprintf(out,"#define %sARG_FETCH %s=yypParser->%s;\n", name,lemp->arg,&lemp->arg[i]); lineno++; - fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n", + fprintf(out,"#define %sARG_STORE yypParser->%s=%s;\n", name,&lemp->arg[i],&lemp->arg[i]); lineno++; }else{ - fprintf(out,"#define %sARG_SDECL\n",name); lineno++; - fprintf(out,"#define %sARG_PDECL\n",name); lineno++; + fprintf(out,"#define %sARG_SDECL\n",name); lineno++; + fprintf(out,"#define %sARG_PDECL\n",name); lineno++; + fprintf(out,"#define %sARG_PARAM\n",name); lineno++; fprintf(out,"#define %sARG_FETCH\n",name); lineno++; fprintf(out,"#define %sARG_STORE\n",name); lineno++; } + if( lemp->ctx && lemp->ctx[0] ){ + i = lemonStrlen(lemp->ctx); + while( i>=1 && ISSPACE(lemp->ctx[i-1]) ) i--; + while( i>=1 && (ISALNUM(lemp->ctx[i-1]) || lemp->ctx[i-1]=='_') ) i--; + fprintf(out,"#define %sCTX_SDECL %s;\n",name,lemp->ctx); lineno++; + fprintf(out,"#define %sCTX_PDECL ,%s\n",name,lemp->ctx); lineno++; + fprintf(out,"#define %sCTX_PARAM ,%s\n",name,&lemp->ctx[i]); lineno++; + fprintf(out,"#define %sCTX_FETCH %s=yypParser->%s;\n", + name,lemp->ctx,&lemp->ctx[i]); lineno++; + fprintf(out,"#define %sCTX_STORE yypParser->%s=%s;\n", + name,&lemp->ctx[i],&lemp->ctx[i]); lineno++; + }else{ + fprintf(out,"#define %sCTX_SDECL\n",name); lineno++; + fprintf(out,"#define %sCTX_PDECL\n",name); lineno++; + fprintf(out,"#define %sCTX_PARAM\n",name); lineno++; + fprintf(out,"#define %sCTX_FETCH\n",name); lineno++; + fprintf(out,"#define %sCTX_STORE\n",name); lineno++; + } if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } - fprintf(out,"#define YYNSTATE %d\n",lemp->nstate); lineno++; - fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; - fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; - fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; + if( lemp->errsym && lemp->errsym->useCnt ){ + fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; + fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; + } if( lemp->has_fallback ){ fprintf(out,"#define YYFALLBACK 1\n"); lineno++; } - tplt_xfer(lemp->name,in,out,&lineno); - /* Generate the action table and its associates: - ** - ** yy_action[] A single table containing all actions. - ** yy_lookahead[] A table containing the lookahead for each entry in - ** yy_action. Used to detect hash collisions. - ** yy_shift_ofst[] For each state, the offset into yy_action for - ** shifting terminals. - ** yy_reduce_ofst[] For each state, the offset into yy_action for - ** shifting non-terminals after a reduce. - ** yy_default[] Default action for each state. + /* Compute the action table, but do not output it yet. The action + ** table must be computed before generating the YYNSTATE macro because + ** we need to know how many states can be eliminated. */ - - /* Compute the actions on all states and count them up */ - ax = malloc( sizeof(ax[0])*lemp->nstate*2 ); + ax = (struct axset *) calloc(lemp->nxstate*2, sizeof(ax[0])); if( ax==0 ){ fprintf(stderr,"malloc failed\n"); exit(1); } - for(i=0; i<lemp->nstate; i++){ + for(i=0; i<lemp->nxstate; i++){ stp = lemp->sorted[i]; - stp->nTknAct = stp->nNtAct = 0; - stp->iDflt = lemp->nstate + lemp->nrule; - stp->iTknOfst = NO_OFFSET; - stp->iNtOfst = NO_OFFSET; - for(ap=stp->ap; ap; ap=ap->next){ - if( compute_action(lemp,ap)>=0 ){ - if( ap->sp->index<lemp->nterminal ){ - stp->nTknAct++; - }else if( ap->sp->index<lemp->nsymbol ){ - stp->nNtAct++; - }else{ - stp->iDflt = compute_action(lemp, ap); - } - } - } ax[i*2].stp = stp; ax[i*2].isTkn = 1; ax[i*2].nAction = stp->nTknAct; @@ -3351,14 +4527,12 @@ int mhflag; /* Output in makeheaders format if true */ } mxTknOfst = mnTknOfst = 0; mxNtOfst = mnNtOfst = 0; - - /* Compute the action table. In order to try to keep the size of the - ** action table to a minimum, the heuristic of placing the largest action - ** sets first is used. - */ - qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare); - pActtab = acttab_alloc(); - for(i=0; i<lemp->nstate*2 && ax[i].nAction>0; i++){ + /* In an effort to minimize the action table size, use the heuristic + ** of placing the largest action sets first */ + for(i=0; i<lemp->nxstate*2; i++) ax[i].iOrder = i; + qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); + pActtab = acttab_alloc(lemp->nsymbol, lemp->nterminal); + for(i=0; i<lemp->nxstate*2 && ax[i].nAction>0; i++){ stp = ax[i].stp; if( ax[i].isTkn ){ for(ap=stp->ap; ap; ap=ap->next){ @@ -3368,7 +4542,7 @@ int mhflag; /* Output in makeheaders format if true */ if( action<0 ) continue; acttab_action(pActtab, ap->sp->index, action); } - stp->iTknOfst = acttab_insert(pActtab); + stp->iTknOfst = acttab_insert(pActtab, 1); if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst; if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; }else{ @@ -3380,19 +4554,75 @@ int mhflag; /* Output in makeheaders format if true */ if( action<0 ) continue; acttab_action(pActtab, ap->sp->index, action); } - stp->iNtOfst = acttab_insert(pActtab); + stp->iNtOfst = acttab_insert(pActtab, 0); if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst; if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; } +#if 0 /* Uncomment for a trace of how the yy_action[] table fills out */ + { int jj, nn; + for(jj=nn=0; jj<pActtab->nAction; jj++){ + if( pActtab->aAction[jj].action<0 ) nn++; + } + printf("%4d: State %3d %s n: %2d size: %5d freespace: %d\n", + i, stp->statenum, ax[i].isTkn ? "Token" : "Var ", + ax[i].nAction, pActtab->nAction, nn); + } +#endif } free(ax); + /* Mark rules that are actually used for reduce actions after all + ** optimizations have been applied + */ + for(rp=lemp->rule; rp; rp=rp->next) rp->doesReduce = LEMON_FALSE; + for(i=0; i<lemp->nxstate; i++){ + for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ + if( ap->type==REDUCE || ap->type==SHIFTREDUCE ){ + ap->x.rp->doesReduce = 1; + } + } + } + + /* Finish rendering the constants now that the action table has + ** been computed */ + fprintf(out,"#define YYNSTATE %d\n",lemp->nxstate); lineno++; + fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; + fprintf(out,"#define YYNRULE_WITH_ACTION %d\n",lemp->nruleWithAction); + lineno++; + fprintf(out,"#define YYNTOKEN %d\n",lemp->nterminal); lineno++; + fprintf(out,"#define YY_MAX_SHIFT %d\n",lemp->nxstate-1); lineno++; + i = lemp->minShiftReduce; + fprintf(out,"#define YY_MIN_SHIFTREDUCE %d\n",i); lineno++; + i += lemp->nrule; + fprintf(out,"#define YY_MAX_SHIFTREDUCE %d\n", i-1); lineno++; + fprintf(out,"#define YY_ERROR_ACTION %d\n", lemp->errAction); lineno++; + fprintf(out,"#define YY_ACCEPT_ACTION %d\n", lemp->accAction); lineno++; + fprintf(out,"#define YY_NO_ACTION %d\n", lemp->noAction); lineno++; + fprintf(out,"#define YY_MIN_REDUCE %d\n", lemp->minReduce); lineno++; + i = lemp->minReduce + lemp->nrule; + fprintf(out,"#define YY_MAX_REDUCE %d\n", i-1); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Now output the action table and its associates: + ** + ** yy_action[] A single table containing all actions. + ** yy_lookahead[] A table containing the lookahead for each entry in + ** yy_action. Used to detect hash collisions. + ** yy_shift_ofst[] For each state, the offset into yy_action for + ** shifting terminals. + ** yy_reduce_ofst[] For each state, the offset into yy_action for + ** shifting non-terminals after a reduce. + ** yy_default[] Default action for each state. + */ + /* Output the yy_action table */ - fprintf(out,"static YYACTIONTYPE yy_action[] = {\n"); lineno++; - n = acttab_size(pActtab); + lemp->nactiontab = n = acttab_action_size(pActtab); + lemp->tablesize += n*szActionType; + fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; + fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; for(i=j=0; i<n; i++){ int action = acttab_yyaction(pActtab, i); - if( action<0 ) action = lemp->nsymbol + lemp->nrule + 2; + if( action<0 ) action = lemp->noAction; if( j==0 ) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", action); if( j==9 || i==n-1 ){ @@ -3405,31 +4635,54 @@ int mhflag; /* Output in makeheaders format if true */ fprintf(out, "};\n"); lineno++; /* Output the yy_lookahead table */ - fprintf(out,"static YYCODETYPE yy_lookahead[] = {\n"); lineno++; + lemp->nlookaheadtab = n = acttab_lookahead_size(pActtab); + lemp->tablesize += n*szCodeType; + fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; for(i=j=0; i<n; i++){ int la = acttab_yylookahead(pActtab, i); if( la<0 ) la = lemp->nsymbol; if( j==0 ) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", la); - if( j==9 || i==n-1 ){ + if( j==9 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + /* Add extra entries to the end of the yy_lookahead[] table so that + ** yy_shift_ofst[]+iToken will always be a valid index into the array, + ** even for the largest possible value of yy_shift_ofst[] and iToken. */ + nLookAhead = lemp->nterminal + lemp->nactiontab; + while( i<nLookAhead ){ + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", lemp->nterminal); + if( j==9 ){ fprintf(out, "\n"); lineno++; j = 0; }else{ j++; } + i++; } + if( j>0 ){ fprintf(out, "\n"); lineno++; } fprintf(out, "};\n"); lineno++; /* Output the yy_shift_ofst[] table */ - fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++; - fprintf(out, "static %s yy_shift_ofst[] = {\n", - minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++; - n = lemp->nstate; + n = lemp->nxstate; + while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; + fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; + fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; + fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; + fprintf(out, "static const %s yy_shift_ofst[] = {\n", + minimum_size_type(mnTknOfst, lemp->nterminal+lemp->nactiontab, &sz)); + lineno++; + lemp->tablesize += n*sz; for(i=j=0; i<n; i++){ int ofst; stp = lemp->sorted[i]; ofst = stp->iTknOfst; - if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1; + if( ofst==NO_OFFSET ) ofst = lemp->nactiontab; if( j==0 ) fprintf(out," /* %5d */ ", i); fprintf(out, " %4d,", ofst); if( j==9 || i==n-1 ){ @@ -3442,10 +4695,14 @@ int mhflag; /* Output in makeheaders format if true */ fprintf(out, "};\n"); lineno++; /* Output the yy_reduce_ofst[] table */ - fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; - fprintf(out, "static %s yy_reduce_ofst[] = {\n", - minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++; - n = lemp->nstate; + n = lemp->nxstate; + while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; + fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; + fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++; + fprintf(out, "#define YY_REDUCE_MAX (%d)\n", mxNtOfst); lineno++; + fprintf(out, "static const %s yy_reduce_ofst[] = {\n", + minimum_size_type(mnNtOfst-1, mxNtOfst, &sz)); lineno++; + lemp->tablesize += n*sz; for(i=j=0; i<n; i++){ int ofst; stp = lemp->sorted[i]; @@ -3463,12 +4720,17 @@ int mhflag; /* Output in makeheaders format if true */ fprintf(out, "};\n"); lineno++; /* Output the default action table */ - fprintf(out, "static YYACTIONTYPE yy_default[] = {\n"); lineno++; - n = lemp->nstate; + fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++; + n = lemp->nxstate; + lemp->tablesize += n*szActionType; for(i=j=0; i<n; i++){ stp = lemp->sorted[i]; if( j==0 ) fprintf(out," /* %5d */ ", i); - fprintf(out, " %4d,", stp->iDflt); + if( stp->iDfltReduce<0 ){ + fprintf(out, " %4d,", lemp->errAction); + }else{ + fprintf(out, " %4d,", stp->iDfltReduce + lemp->minReduce); + } if( j==9 || i==n-1 ){ fprintf(out, "\n"); lineno++; j = 0; @@ -3482,7 +4744,12 @@ int mhflag; /* Output in makeheaders format if true */ /* Generate the table of fallback tokens. */ if( lemp->has_fallback ){ - for(i=0; i<lemp->nterminal; i++){ + int mx = lemp->nterminal - 1; + /* 2019-08-28: Generate fallback entries for every token to avoid + ** having to do a range check on the index */ + /* while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } */ + lemp->tablesize += (mx+1)*szCodeType; + for(i=0; i<=mx; i++){ struct symbol *p = lemp->symbols[i]; if( p->fallback==0 ){ fprintf(out, " 0, /* %10s => nothing */\n", p->name); @@ -3498,11 +4765,8 @@ int mhflag; /* Output in makeheaders format if true */ /* Generate a table containing the symbolic name of every symbol */ for(i=0; i<lemp->nsymbol; i++){ - sprintf(line,"\"%s\",",lemp->symbols[i]->name); - fprintf(out," %-15s",line); - if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } + fprintf(out," /* %4d */ \"%s\",\n",i, lemp->symbols[i]->name); lineno++; } - if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate a table containing a text string that describes every @@ -3510,9 +4774,9 @@ int mhflag; /* Output in makeheaders format if true */ ** when tracing REDUCE actions. */ for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ - assert( rp->index==i ); - fprintf(out," /* %3d */ \"%s ::=", i, rp->lhs->name); - for(j=0; j<rp->nrhs; j++) fprintf(out," %s",rp->rhs[j]->name); + assert( rp->iRule==i ); + fprintf(out," /* %3d */ \"", i); + writeRuleText(out, rp); fprintf(out,"\",\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); @@ -3522,10 +4786,15 @@ int mhflag; /* Output in makeheaders format if true */ ** (In other words, generate the %destructor actions) */ if( lemp->tokendest ){ + int once = 1; for(i=0; i<lemp->nsymbol; i++){ struct symbol *sp = lemp->symbols[i]; if( sp==0 || sp->type!=TERMINAL ) continue; - fprintf(out," case %d:\n",sp->index); lineno++; + if( once ){ + fprintf(out, " /* TERMINAL Destructor */\n"); lineno++; + once = 0; + } + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; } for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++); if( i<lemp->nsymbol ){ @@ -3533,100 +4802,176 @@ int mhflag; /* Output in makeheaders format if true */ fprintf(out," break;\n"); lineno++; } } - for(i=0; i<lemp->nsymbol; i++){ - struct symbol *sp = lemp->symbols[i]; - if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; - fprintf(out," case %d:\n",sp->index); lineno++; - emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); - fprintf(out," break;\n"); lineno++; - } if( lemp->vardest ){ struct symbol *dflt_sp = 0; + int once = 1; for(i=0; i<lemp->nsymbol; i++){ struct symbol *sp = lemp->symbols[i]; if( sp==0 || sp->type==TERMINAL || sp->index<=0 || sp->destructor!=0 ) continue; - fprintf(out," case %d:\n",sp->index); lineno++; + if( once ){ + fprintf(out, " /* Default NON-TERMINAL Destructor */\n");lineno++; + once = 0; + } + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; dflt_sp = sp; } if( dflt_sp!=0 ){ emit_destructor_code(out,dflt_sp,lemp,&lineno); - fprintf(out," break;\n"); lineno++; } + fprintf(out," break;\n"); lineno++; + } + for(i=0; i<lemp->nsymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; + if( sp->destLineno<0 ) continue; /* Already emitted */ + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + + /* Combine duplicate destructors into a single case */ + for(j=i+1; j<lemp->nsymbol; j++){ + struct symbol *sp2 = lemp->symbols[j]; + if( sp2 && sp2->type!=TERMINAL && sp2->destructor + && sp2->dtnum==sp->dtnum + && strcmp(sp->destructor,sp2->destructor)==0 ){ + fprintf(out," case %d: /* %s */\n", + sp2->index, sp2->name); lineno++; + sp2->destLineno = -1; /* Avoid emitting this destructor again */ + } + } + + emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); + fprintf(out," break;\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes whenever the parser stack overflows */ - tplt_print(out,lemp,lemp->overflow,lemp->overflowln,&lineno); + tplt_print(out,lemp,lemp->overflow,&lineno); tplt_xfer(lemp->name,in,out,&lineno); - /* Generate the table of rule information + /* Generate the tables of rule information. yyRuleInfoLhs[] and + ** yyRuleInfoNRhs[]. ** ** Note: This code depends on the fact that rules are number - ** sequentually beginning with 0. + ** sequentially beginning with 0. */ - for(rp=lemp->rule; rp; rp=rp->next){ - fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++; + for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ + fprintf(out," %4d, /* (%d) ", rp->lhs->index, i); + rule_print(out, rp); + fprintf(out," */\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ + fprintf(out," %3d, /* (%d) ", -rp->nrhs, i); + rule_print(out, rp); + fprintf(out," */\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which execution during each REDUCE action */ + i = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + i += translate_code(lemp, rp); + } + if( i ){ + fprintf(out," YYMINORTYPE yylhsminor;\n"); lineno++; + } + /* First output rules other than the default: rule */ for(rp=lemp->rule; rp; rp=rp->next){ - fprintf(out," case %d:\n",rp->index); lineno++; + struct rule *rp2; /* Other rules with the same action */ + if( rp->codeEmitted ) continue; + if( rp->noCode ){ + /* No C code actions, so this will be part of the "default:" rule */ + continue; + } + fprintf(out," case %d: /* ", rp->iRule); + writeRuleText(out, rp); + fprintf(out, " */\n"); lineno++; + for(rp2=rp->next; rp2; rp2=rp2->next){ + if( rp2->code==rp->code && rp2->codePrefix==rp->codePrefix + && rp2->codeSuffix==rp->codeSuffix ){ + fprintf(out," case %d: /* ", rp2->iRule); + writeRuleText(out, rp2); + fprintf(out," */ yytestcase(yyruleno==%d);\n", rp2->iRule); lineno++; + rp2->codeEmitted = 1; + } + } emit_code(out,rp,lemp,&lineno); fprintf(out," break;\n"); lineno++; + rp->codeEmitted = 1; + } + /* Finally, output the default: rule. We choose as the default: all + ** empty actions. */ + fprintf(out," default:\n"); lineno++; + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->codeEmitted ) continue; + assert( rp->noCode ); + fprintf(out," /* (%d) ", rp->iRule); + writeRuleText(out, rp); + if( rp->neverReduce ){ + fprintf(out, " (NEVER REDUCES) */ assert(yyruleno!=%d);\n", + rp->iRule); lineno++; + }else if( rp->doesReduce ){ + fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->iRule); lineno++; + }else{ + fprintf(out, " (OPTIMIZED OUT) */ assert(yyruleno!=%d);\n", + rp->iRule); lineno++; + } } + fprintf(out," break;\n"); lineno++; tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes if a parse fails */ - tplt_print(out,lemp,lemp->failure,lemp->failureln,&lineno); + tplt_print(out,lemp,lemp->failure,&lineno); tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes when a syntax error occurs */ - tplt_print(out,lemp,lemp->error,lemp->errorln,&lineno); + tplt_print(out,lemp,lemp->error,&lineno); tplt_xfer(lemp->name,in,out,&lineno); /* Generate code which executes when the parser accepts its input */ - tplt_print(out,lemp,lemp->accept,lemp->acceptln,&lineno); + tplt_print(out,lemp,lemp->accept,&lineno); tplt_xfer(lemp->name,in,out,&lineno); /* Append any addition code the user desires */ - tplt_print(out,lemp,lemp->extracode,lemp->extracodeln,&lineno); + tplt_print(out,lemp,lemp->extracode,&lineno); acttab_free(pActtab); fclose(in); fclose(out); + if( sql ) fclose(sql); return; } /* Generate a header file for the parser */ -void ReportHeader(lemp) -struct lemon *lemp; +void ReportHeader(struct lemon *lemp) { FILE *out, *in; - char *prefix; + const char *prefix; char line[LINESIZE]; char pattern[LINESIZE]; int i; if( lemp->tokenprefix ) prefix = lemp->tokenprefix; else prefix = ""; - in = file_open(lemp,".h","r"); + in = file_open(lemp,".h","rb"); if( in ){ + int nextChar; for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){ - sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); + lemon_sprintf(pattern,"#define %s%-30s %3d\n", + prefix,lemp->symbols[i]->name,i); if( strcmp(line,pattern) ) break; } + nextChar = fgetc(in); fclose(in); - if( i==lemp->nterminal ){ + if( i==lemp->nterminal && nextChar==EOF ){ /* No change in the file. Don't rewrite it. */ return; } } - out = file_open(lemp,".h","w"); + out = file_open(lemp,".h","wb"); if( out ){ for(i=1; i<lemp->nterminal; i++){ - fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); + fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i); } fclose(out); } @@ -3637,25 +4982,31 @@ struct lemon *lemp; ** of defaults. ** ** In this version, we take the most frequent REDUCE action and make -** it the default. Only default a reduce if there are more than one. +** it the default. Except, there is no default if the wildcard token +** is a possible look-ahead. */ -void CompressTables(lemp) -struct lemon *lemp; +void CompressTables(struct lemon *lemp) { struct state *stp; - struct action *ap, *ap2; + struct action *ap, *ap2, *nextap; struct rule *rp, *rp2, *rbest; int nbest, n; int i; + int usesWildcard; for(i=0; i<lemp->nstate; i++){ stp = lemp->sorted[i]; nbest = 0; rbest = 0; + usesWildcard = 0; for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==SHIFT && ap->sp==lemp->wildcard ){ + usesWildcard = 1; + } if( ap->type!=REDUCE ) continue; rp = ap->x.rp; + if( rp->lhsStart ) continue; if( rp==rbest ) continue; n = 1; for(ap2=ap->next; ap2; ap2=ap2->next){ @@ -3671,8 +5022,10 @@ struct lemon *lemp; } /* Do not make a default if the number of rules to default - ** is not at least 2 */ - if( nbest<2 ) continue; + ** is not at least 1 or if the wildcard token is a possible + ** lookahead. + */ + if( nbest<1 || usesWildcard ) continue; /* Combine matching REDUCE actions into a single default */ @@ -3685,62 +5038,177 @@ struct lemon *lemp; if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; } stp->ap = Action_sort(stp->ap); + + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==SHIFT ) break; + if( ap->type==REDUCE && ap->x.rp!=rbest ) break; + } + if( ap==0 ){ + stp->autoReduce = 1; + stp->pDfltReduce = rbest; + } + } + + /* Make a second pass over all states and actions. Convert + ** every action that is a SHIFT to an autoReduce state into + ** a SHIFTREDUCE action. + */ + for(i=0; i<lemp->nstate; i++){ + stp = lemp->sorted[i]; + for(ap=stp->ap; ap; ap=ap->next){ + struct state *pNextState; + if( ap->type!=SHIFT ) continue; + pNextState = ap->x.stp; + if( pNextState->autoReduce && pNextState->pDfltReduce!=0 ){ + ap->type = SHIFTREDUCE; + ap->x.rp = pNextState->pDfltReduce; + } + } + } + + /* If a SHIFTREDUCE action specifies a rule that has a single RHS term + ** (meaning that the SHIFTREDUCE will land back in the state where it + ** started) and if there is no C-code associated with the reduce action, + ** then we can go ahead and convert the action to be the same as the + ** action for the RHS of the rule. + */ + for(i=0; i<lemp->nstate; i++){ + stp = lemp->sorted[i]; + for(ap=stp->ap; ap; ap=nextap){ + nextap = ap->next; + if( ap->type!=SHIFTREDUCE ) continue; + rp = ap->x.rp; + if( rp->noCode==0 ) continue; + if( rp->nrhs!=1 ) continue; +#if 1 + /* Only apply this optimization to non-terminals. It would be OK to + ** apply it to terminal symbols too, but that makes the parser tables + ** larger. */ + if( ap->sp->index<lemp->nterminal ) continue; +#endif + /* If we reach this point, it means the optimization can be applied */ + nextap = ap; + for(ap2=stp->ap; ap2 && (ap2==ap || ap2->sp!=rp->lhs); ap2=ap2->next){} + assert( ap2!=0 ); + ap->spOpt = ap2->sp; + ap->type = ap2->type; + ap->x = ap2->x; + } } } + +/* +** Compare two states for sorting purposes. The smaller state is the +** one with the most non-terminal actions. If they have the same number +** of non-terminal actions, then the smaller is the one with the most +** token actions. +*/ +static int stateResortCompare(const void *a, const void *b){ + const struct state *pA = *(const struct state**)a; + const struct state *pB = *(const struct state**)b; + int n; + + n = pB->nNtAct - pA->nNtAct; + if( n==0 ){ + n = pB->nTknAct - pA->nTknAct; + if( n==0 ){ + n = pB->statenum - pA->statenum; + } + } + assert( n!=0 ); + return n; +} + + +/* +** Renumber and resort states so that states with fewer choices +** occur at the end. Except, keep state 0 as the first state. +*/ +void ResortStates(struct lemon *lemp) +{ + int i; + struct state *stp; + struct action *ap; + + for(i=0; i<lemp->nstate; i++){ + stp = lemp->sorted[i]; + stp->nTknAct = stp->nNtAct = 0; + stp->iDfltReduce = -1; /* Init dflt action to "syntax error" */ + stp->iTknOfst = NO_OFFSET; + stp->iNtOfst = NO_OFFSET; + for(ap=stp->ap; ap; ap=ap->next){ + int iAction = compute_action(lemp,ap); + if( iAction>=0 ){ + if( ap->sp->index<lemp->nterminal ){ + stp->nTknAct++; + }else if( ap->sp->index<lemp->nsymbol ){ + stp->nNtAct++; + }else{ + assert( stp->autoReduce==0 || stp->pDfltReduce==ap->x.rp ); + stp->iDfltReduce = iAction; + } + } + } + } + qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), + stateResortCompare); + for(i=0; i<lemp->nstate; i++){ + lemp->sorted[i]->statenum = i; + } + lemp->nxstate = lemp->nstate; + while( lemp->nxstate>1 && lemp->sorted[lemp->nxstate-1]->autoReduce ){ + lemp->nxstate--; + } +} + + /***************** From the file "set.c" ************************************/ /* ** Set manipulation routines for the LEMON parser generator. */ -static int global_size = 0; +static int size = 0; /* Set the set size */ -void SetSize(n) -int n; +void SetSize(int n) { - global_size = n+1; + size = n+1; } /* Allocate a new set */ -char *SetNew(){ +char *SetNew(void){ char *s; - int i; - s = (char*)malloc( global_size ); + s = (char*)calloc( size, 1); if( s==0 ){ memory_error(); } - for(i=0; i<global_size; i++) s[i] = 0; return s; } /* Deallocate a set */ -void SetFree(s) -char *s; +void SetFree(char *s) { free(s); } /* Add a new element to the set. Return TRUE if the element was added ** and FALSE if it was already there. */ -int SetAdd(s,e) -char *s; -int e; +int SetAdd(char *s, int e) { int rv; + assert( e>=0 && e<size ); rv = s[e]; s[e] = 1; return !rv; } /* Add every element of s2 to s1. Return TRUE if s1 changes. */ -int SetUnion(s1,s2) -char *s1; -char *s2; +int SetUnion(char *s1, char *s2) { int i, progress; progress = 0; - for(i=0; i<global_size; i++){ + for(i=0; i<size; i++){ if( s2[i]==0 ) continue; if( s1[i]==0 ){ progress = 1; @@ -3762,11 +5230,10 @@ char *s2; ** Code for processing tables in the LEMON parser generator. */ -PRIVATE int strhash(x) -char *x; +PRIVATE unsigned strhash(const char *x) { - unsigned int h = 0; - while( *x) h = h*13u + (unsigned int) *(x++); + unsigned h = 0; + while( *x ) h = h*13 + *(x++); return h; } @@ -3774,14 +5241,16 @@ char *x; ** keep strings in a table so that the same string is not in more ** than one place. */ -char *Strsafe(y) -char *y; +const char *Strsafe(const char *y) { - char *z; + const char *z; + char *cpy; + if( y==0 ) return 0; z = Strsafe_find(y); - if( z==0 && (z=malloc( strlen(y)+1 ))!=0 ){ - strcpy(z,y); + if( z==0 && (cpy=(char *)malloc( lemonStrlen(y)+1 ))!=0 ){ + lemon_strcpy(cpy,y); + z = cpy; Strsafe_insert(z); } MemoryCheck(z); @@ -3804,7 +5273,7 @@ struct s_x1 { ** in an associative array of type "x1". */ typedef struct s_x1node { - char *data; /* The data */ + const char *data; /* The data */ struct s_x1node *next; /* Next entry with the same hash */ struct s_x1node **from; /* Previous link */ } x1node; @@ -3813,14 +5282,13 @@ typedef struct s_x1node { static struct s_x1 *x1a; /* Allocate a new associative array */ -void Strsafe_init(){ +void Strsafe_init(void){ if( x1a ) return; x1a = (struct s_x1*)malloc( sizeof(struct s_x1) ); if( x1a ){ x1a->size = 1024; x1a->count = 0; - x1a->tbl = (x1node*)malloc( - (sizeof(x1node) + sizeof(x1node*))*1024 ); + x1a->tbl = (x1node*)calloc(1024, sizeof(x1node) + sizeof(x1node*)); if( x1a->tbl==0 ){ free(x1a); x1a = 0; @@ -3833,12 +5301,11 @@ void Strsafe_init(){ } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ -int Strsafe_insert(data) -char *data; +int Strsafe_insert(const char *data) { x1node *np; - int h; - int ph; + unsigned h; + unsigned ph; if( x1a==0 ) return 0; ph = strhash(data); @@ -3854,19 +5321,18 @@ char *data; } if( x1a->count>=x1a->size ){ /* Need to make the hash table bigger */ - int i,size; + int i,arrSize; struct s_x1 array; - array.size = size = x1a->size*2; + array.size = arrSize = x1a->size*2; array.count = x1a->count; - array.tbl = (x1node*)malloc( - (sizeof(x1node) + sizeof(x1node*))*size ); + array.tbl = (x1node*)calloc(arrSize, sizeof(x1node) + sizeof(x1node*)); if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ - array.ht = (x1node**)&(array.tbl[size]); - for(i=0; i<size; i++) array.ht[i] = 0; + array.ht = (x1node**)&(array.tbl[arrSize]); + for(i=0; i<arrSize; i++) array.ht[i] = 0; for(i=0; i<x1a->count; i++){ x1node *oldnp, *newnp; oldnp = &(x1a->tbl[i]); - h = strhash(oldnp->data) & (size-1); + h = strhash(oldnp->data) & (arrSize-1); newnp = &(array.tbl[i]); if( array.ht[h] ) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; @@ -3874,9 +5340,9 @@ char *data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } - free(x1a->tbl); - /* *x1a = array; *//* copy 'array' */ - memcpy(x1a, &array, sizeof(array)); + /* free(x1a->tbl); // This program was originally for 16-bit machines. + ** Don't worry about freeing memory on modern platforms. */ + *x1a = array; } /* Insert the new data */ h = ph & (x1a->size-1); @@ -3891,10 +5357,9 @@ char *data; /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ -char *Strsafe_find(key) -char *key; +const char *Strsafe_find(const char *key) { - int h; + unsigned h; x1node *np; if( x1a==0 ) return 0; @@ -3910,44 +5375,53 @@ char *key; /* Return a pointer to the (terminal or nonterminal) symbol "x". ** Create a new symbol if this is the first time "x" has been seen. */ -struct symbol *Symbol_new(x) -char *x; +struct symbol *Symbol_new(const char *x) { struct symbol *sp; sp = Symbol_find(x); if( sp==0 ){ - sp = (struct symbol *)malloc( sizeof(struct symbol) ); + sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); MemoryCheck(sp); sp->name = Strsafe(x); - sp->type = isupper(*x) ? TERMINAL : NONTERMINAL; + sp->type = ISUPPER(*x) ? TERMINAL : NONTERMINAL; sp->rule = 0; sp->fallback = 0; sp->prec = -1; sp->assoc = UNK; sp->firstset = 0; - sp->lambda = Bo_FALSE; + sp->lambda = LEMON_FALSE; sp->destructor = 0; + sp->destLineno = 0; sp->datatype = 0; + sp->useCnt = 0; Symbol_insert(sp,sp->name); } + sp->useCnt++; return sp; } -/* Compare two symbols for working purposes +/* Compare two symbols for sorting purposes. Return negative, +** zero, or positive if a is less then, equal to, or greater +** than b. ** ** Symbols that begin with upper case letters (terminals or tokens) ** must sort before symbols that begin with lower case letters -** (non-terminals). Other than that, the order does not matter. +** (non-terminals). And MULTITERMINAL symbols (created using the +** %token_class directive) must sort at the very end. Other than +** that, the order does not matter. ** ** We find experimentally that leaving the symbols in their original ** order (the order they appeared in the grammar file) gives the ** smallest parser tables in SQLite. */ -int Symbolcmpp(struct symbol **a, struct symbol **b){ - int i1 = (**a).index + 10000000*((**a).name[0]>'Z'); - int i2 = (**b).index + 10000000*((**b).name[0]>'Z'); - return i1-i2; +int Symbolcmpp(const void *_a, const void *_b) +{ + const struct symbol *a = *(const struct symbol **) _a; + const struct symbol *b = *(const struct symbol **) _b; + int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1; + int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1; + return i1==i2 ? a->index - b->index : i1 - i2; } /* There is one instance of the following structure for each @@ -3966,8 +5440,8 @@ struct s_x2 { ** in an associative array of type "x2". */ typedef struct s_x2node { - struct symbol *data; /* The data */ - char *key; /* The key */ + struct symbol *data; /* The data */ + const char *key; /* The key */ struct s_x2node *next; /* Next entry with the same hash */ struct s_x2node **from; /* Previous link */ } x2node; @@ -3976,14 +5450,13 @@ typedef struct s_x2node { static struct s_x2 *x2a; /* Allocate a new associative array */ -void Symbol_init(){ +void Symbol_init(void){ if( x2a ) return; x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); if( x2a ){ x2a->size = 128; x2a->count = 0; - x2a->tbl = (x2node*)malloc( - (sizeof(x2node) + sizeof(x2node*))*128 ); + x2a->tbl = (x2node*)calloc(128, sizeof(x2node) + sizeof(x2node*)); if( x2a->tbl==0 ){ free(x2a); x2a = 0; @@ -3996,13 +5469,11 @@ void Symbol_init(){ } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ -int Symbol_insert(data,key) -struct symbol *data; -char *key; +int Symbol_insert(struct symbol *data, const char *key) { x2node *np; - int h; - int ph; + unsigned h; + unsigned ph; if( x2a==0 ) return 0; ph = strhash(key); @@ -4018,19 +5489,18 @@ char *key; } if( x2a->count>=x2a->size ){ /* Need to make the hash table bigger */ - int i,size; + int i,arrSize; struct s_x2 array; - array.size = size = x2a->size*2; + array.size = arrSize = x2a->size*2; array.count = x2a->count; - array.tbl = (x2node*)malloc( - (sizeof(x2node) + sizeof(x2node*))*size ); + array.tbl = (x2node*)calloc(arrSize, sizeof(x2node) + sizeof(x2node*)); if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ - array.ht = (x2node**)&(array.tbl[size]); - for(i=0; i<size; i++) array.ht[i] = 0; + array.ht = (x2node**)&(array.tbl[arrSize]); + for(i=0; i<arrSize; i++) array.ht[i] = 0; for(i=0; i<x2a->count; i++){ x2node *oldnp, *newnp; oldnp = &(x2a->tbl[i]); - h = strhash(oldnp->key) & (size-1); + h = strhash(oldnp->key) & (arrSize-1); newnp = &(array.tbl[i]); if( array.ht[h] ) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; @@ -4039,9 +5509,10 @@ char *key; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } - free(x2a->tbl); - /* *x2a = array; *//* copy 'array' */ - memcpy(x2a, &array, sizeof(array)); + /* free(x2a->tbl); // This program was originally written for 16-bit + ** machines. Don't worry about freeing this trivial amount of memory + ** on modern platforms. Just leak it. */ + *x2a = array; } /* Insert the new data */ h = ph & (x2a->size-1); @@ -4057,10 +5528,9 @@ char *key; /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ -struct symbol *Symbol_find(key) -char *key; +struct symbol *Symbol_find(const char *key) { - int h; + unsigned h; x2node *np; if( x2a==0 ) return 0; @@ -4074,8 +5544,7 @@ char *key; } /* Return the n-th data. Return NULL if n is out of range. */ -struct symbol *Symbol_Nth(n) -int n; +struct symbol *Symbol_Nth(int n) { struct symbol *data; if( x2a && n>0 && n<=x2a->count ){ @@ -4098,21 +5567,21 @@ int Symbol_count() struct symbol **Symbol_arrayof() { struct symbol **array; - int i,size; + int i,arrSize; if( x2a==0 ) return 0; - size = x2a->count; - array = (struct symbol **)malloc( sizeof(struct symbol *)*size ); + arrSize = x2a->count; + array = (struct symbol **)calloc(arrSize, sizeof(struct symbol *)); if( array ){ - for(i=0; i<size; i++) array[i] = x2a->tbl[i].data; + for(i=0; i<arrSize; i++) array[i] = x2a->tbl[i].data; } return array; } /* Compare two configurations */ -int Configcmp(a,b) -struct config *a; -struct config *b; +int Configcmp(const char *_a,const char *_b) { + const struct config *a = (struct config *) _a; + const struct config *b = (struct config *) _b; int x; x = a->rp->index - b->rp->index; if( x==0 ) x = a->dot - b->dot; @@ -4120,9 +5589,7 @@ struct config *b; } /* Compare two states */ -PRIVATE int statecmp(a,b) -struct config *a; -struct config *b; +PRIVATE int statecmp(struct config *a, struct config *b) { int rc; for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ @@ -4137,12 +5604,11 @@ struct config *b; } /* Hash a state */ -PRIVATE int statehash(a) -struct config *a; +PRIVATE unsigned statehash(struct config *a) { - unsigned int h=0; + unsigned h=0; while( a ){ - h = h*571u + (unsigned int)a->rp->index*37u + (unsigned int)a->dot; + h = h*571 + a->rp->index*37 + a->dot; a = a->bp; } return h; @@ -4151,10 +5617,10 @@ struct config *a; /* Allocate a new state structure */ struct state *State_new() { - struct state *new; - new = (struct state *)malloc( sizeof(struct state) ); - MemoryCheck(new); - return new; + struct state *newstate; + newstate = (struct state *)calloc(1, sizeof(struct state) ); + MemoryCheck(newstate); + return newstate; } /* There is one instance of the following structure for each @@ -4183,14 +5649,13 @@ typedef struct s_x3node { static struct s_x3 *x3a; /* Allocate a new associative array */ -void State_init(){ +void State_init(void){ if( x3a ) return; x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); if( x3a ){ x3a->size = 128; x3a->count = 0; - x3a->tbl = (x3node*)malloc( - (sizeof(x3node) + sizeof(x3node*))*128 ); + x3a->tbl = (x3node*)calloc(128, sizeof(x3node) + sizeof(x3node*)); if( x3a->tbl==0 ){ free(x3a); x3a = 0; @@ -4203,13 +5668,11 @@ void State_init(){ } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ -int State_insert(data,key) -struct state *data; -struct config *key; +int State_insert(struct state *data, struct config *key) { x3node *np; - int h; - int ph; + unsigned h; + unsigned ph; if( x3a==0 ) return 0; ph = statehash(key); @@ -4225,19 +5688,18 @@ struct config *key; } if( x3a->count>=x3a->size ){ /* Need to make the hash table bigger */ - int i,size; + int i,arrSize; struct s_x3 array; - array.size = size = x3a->size*2; + array.size = arrSize = x3a->size*2; array.count = x3a->count; - array.tbl = (x3node*)malloc( - (sizeof(x3node) + sizeof(x3node*))*size ); + array.tbl = (x3node*)calloc(arrSize, sizeof(x3node) + sizeof(x3node*)); if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ - array.ht = (x3node**)&(array.tbl[size]); - for(i=0; i<size; i++) array.ht[i] = 0; + array.ht = (x3node**)&(array.tbl[arrSize]); + for(i=0; i<arrSize; i++) array.ht[i] = 0; for(i=0; i<x3a->count; i++){ x3node *oldnp, *newnp; oldnp = &(x3a->tbl[i]); - h = statehash(oldnp->key) & (size-1); + h = statehash(oldnp->key) & (arrSize-1); newnp = &(array.tbl[i]); if( array.ht[h] ) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; @@ -4247,8 +5709,7 @@ struct config *key; array.ht[h] = newnp; } free(x3a->tbl); - /* *x3a = array; *//* copy 'array' */ - memcpy(x3a, &array, sizeof(array)); + *x3a = array; } /* Insert the new data */ h = ph & (x3a->size-1); @@ -4264,10 +5725,9 @@ struct config *key; /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ -struct state *State_find(key) -struct config *key; +struct state *State_find(struct config *key) { - int h; + unsigned h; x3node *np; if( x3a==0 ) return 0; @@ -4280,33 +5740,26 @@ struct config *key; return np ? np->data : 0; } -/* Return the size of the array */ -int State_count(void) -{ - return x3a ? x3a->count : 0; -} - /* Return an array of pointers to all data in the table. ** The array is obtained from malloc. Return NULL if memory allocation ** problems, or if the array is empty. */ -struct state **State_arrayof() +struct state **State_arrayof(void) { struct state **array; - int i,size; + int i,arrSize; if( x3a==0 ) return 0; - size = x3a->count; - array = (struct state **)malloc( sizeof(struct state *)*size ); + arrSize = x3a->count; + array = (struct state **)calloc(arrSize, sizeof(struct state *)); if( array ){ - for(i=0; i<size; i++) array[i] = x3a->tbl[i].data; + for(i=0; i<arrSize; i++) array[i] = x3a->tbl[i].data; } return array; } /* Hash a configuration */ -PRIVATE int confighash(a) -struct config *a; +PRIVATE unsigned confighash(struct config *a) { - int h=0; + unsigned h=0; h = h*571 + a->rp->index*37 + a->dot; return h; } @@ -4336,14 +5789,13 @@ typedef struct s_x4node { static struct s_x4 *x4a; /* Allocate a new associative array */ -void Configtable_init(){ +void Configtable_init(void){ if( x4a ) return; x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); if( x4a ){ x4a->size = 64; x4a->count = 0; - x4a->tbl = (x4node*)malloc( - (sizeof(x4node) + sizeof(x4node*))*64 ); + x4a->tbl = (x4node*)calloc(64, sizeof(x4node) + sizeof(x4node*)); if( x4a->tbl==0 ){ free(x4a); x4a = 0; @@ -4356,19 +5808,18 @@ void Configtable_init(){ } /* Insert a new record into the array. Return TRUE if successful. ** Prior data with the same key is NOT overwritten */ -int Configtable_insert(data) -struct config *data; +int Configtable_insert(struct config *data) { x4node *np; - int h; - int ph; + unsigned h; + unsigned ph; if( x4a==0 ) return 0; ph = confighash(data); h = ph & (x4a->size-1); np = x4a->ht[h]; while( np ){ - if( Configcmp(np->data,data)==0 ){ + if( Configcmp((const char *) np->data,(const char *) data)==0 ){ /* An existing entry with the same key is found. */ /* Fail because overwrite is not allows. */ return 0; @@ -4377,19 +5828,18 @@ struct config *data; } if( x4a->count>=x4a->size ){ /* Need to make the hash table bigger */ - int i,size; + int i,arrSize; struct s_x4 array; - array.size = size = x4a->size*2; + array.size = arrSize = x4a->size*2; array.count = x4a->count; - array.tbl = (x4node*)malloc( - (sizeof(x4node) + sizeof(x4node*))*size ); + array.tbl = (x4node*)calloc(arrSize, sizeof(x4node) + sizeof(x4node*)); if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ - array.ht = (x4node**)&(array.tbl[size]); - for(i=0; i<size; i++) array.ht[i] = 0; + array.ht = (x4node**)&(array.tbl[arrSize]); + for(i=0; i<arrSize; i++) array.ht[i] = 0; for(i=0; i<x4a->count; i++){ x4node *oldnp, *newnp; oldnp = &(x4a->tbl[i]); - h = confighash(oldnp->data) & (size-1); + h = confighash(oldnp->data) & (arrSize-1); newnp = &(array.tbl[i]); if( array.ht[h] ) array.ht[h]->from = &(newnp->next); newnp->next = array.ht[h]; @@ -4397,9 +5847,10 @@ struct config *data; newnp->from = &(array.ht[h]); array.ht[h] = newnp; } - free(x4a->tbl); - /* *x4a = array; *//* copy 'array' */ - memcpy(x4a, &array, sizeof(array)); + /* free(x4a->tbl); // This code was originall written for 16-bit machines. + ** on modern machines, don't worry about freeing this trival amount of + ** memory. */ + *x4a = array; } /* Insert the new data */ h = ph & (x4a->size-1); @@ -4414,8 +5865,7 @@ struct config *data; /* Return a pointer to data assigned to the given key. Return NULL ** if no such key. */ -struct config *Configtable_find(key) -struct config *key; +struct config *Configtable_find(struct config *key) { int h; x4node *np; @@ -4424,7 +5874,7 @@ struct config *key; h = confighash(key) & (x4a->size-1); np = x4a->ht[h]; while( np ){ - if( Configcmp(np->data,key)==0 ) break; + if( Configcmp((const char *) np->data,(const char *) key)==0 ) break; np = np->next; } return np ? np->data : 0; @@ -4432,8 +5882,7 @@ struct config *key; /* Remove all data from the table. Pass each data to the function "f" ** as it is removed. ("f" may be null to avoid this step.) */ -void Configtable_clear(f) -int(*f)(/* struct config * */); +void Configtable_clear(int(*f)(struct config *)) { int i; if( x4a==0 || x4a->count==0 ) return; |