/* * Copyright 2001-2006 Adrian Thurston * 2004 Erich Ocean * 2005 Alan West */ /* This file is part of Ragel. * * Ragel is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Ragel is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Ragel; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "cdcodegen.h" #include "ragel.h" #include "redfsm.h" #include "gendata.h" #include #include #include using std::ostream; using std::ostringstream; using std::string; using std::cerr; using std::endl; using std::istream; using std::ifstream; using std::ostream; using std::ios; using std::cin; using std::cout; using std::cerr; using std::endl; extern int numSplitPartitions; extern bool noLineDirectives; void cdLineDirective( ostream &out, const char *fileName, int line ) { if ( noLineDirectives ) out << "/* "; /* Write the preprocessor line info for to the input file. */ out << "#line " << line << " \""; for ( const char *pc = fileName; *pc != 0; pc++ ) { if ( *pc == '\\' ) out << "\\\\"; else out << *pc; } out << '"'; if ( noLineDirectives ) out << " */"; out << '\n'; } void FsmCodeGen::genLineDirective( ostream &out ) { std::streambuf *sbuf = out.rdbuf(); output_filter *filter = static_cast(sbuf); cdLineDirective( out, filter->fileName, filter->line + 1 ); } /* Init code gen with in parameters. */ FsmCodeGen::FsmCodeGen( ostream &out ) : CodeGenData(out) { } unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal ) { long long maxValLL = (long long) maxVal; HostType *arrayType = keyOps->typeSubsumes( maxValLL ); assert( arrayType != 0 ); return arrayType->size; } string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal ) { long long maxValLL = (long long) maxVal; HostType *arrayType = keyOps->typeSubsumes( maxValLL ); assert( arrayType != 0 ); string ret = arrayType->data1; if ( arrayType->data2 != 0 ) { ret += " "; ret += arrayType->data2; } return ret; } /* Write out the fsm name. */ string FsmCodeGen::FSM_NAME() { return fsmName; } /* Emit the offset of the start state as a decimal integer. */ string FsmCodeGen::START_STATE_ID() { ostringstream ret; ret << redFsm->startState->id; return ret.str(); }; /* Write out the array of actions. */ std::ostream &FsmCodeGen::ACTIONS_ARRAY() { out << "\t0, "; int totalActions = 1; for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { /* Write out the length, which will never be the last character. */ out << act->key.length() << ", "; /* Put in a line break every 8 */ if ( totalActions++ % 8 == 7 ) out << "\n\t"; for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) { out << item->value->actionId; if ( ! (act.last() && item.last()) ) out << ", "; /* Put in a line break every 8 */ if ( totalActions++ % 8 == 7 ) out << "\n\t"; } } out << "\n"; return out; } string FsmCodeGen::ACCESS() { ostringstream ret; if ( accessExpr != 0 ) INLINE_LIST( ret, accessExpr, 0, false, false ); return ret.str(); } string FsmCodeGen::P() { ostringstream ret; if ( pExpr == 0 ) ret << "p"; else { ret << "("; INLINE_LIST( ret, pExpr, 0, false, false ); ret << ")"; } return ret.str(); } string FsmCodeGen::PE() { ostringstream ret; if ( peExpr == 0 ) ret << "pe"; else { ret << "("; INLINE_LIST( ret, peExpr, 0, false, false ); ret << ")"; } return ret.str(); } string FsmCodeGen::vEOF() { ostringstream ret; if ( eofExpr == 0 ) ret << "eof"; else { ret << "("; INLINE_LIST( ret, eofExpr, 0, false, false ); ret << ")"; } return ret.str(); } string FsmCodeGen::vCS() { ostringstream ret; if ( csExpr == 0 ) ret << ACCESS() << "cs"; else { /* Emit the user supplied method of retrieving the key. */ ret << "("; INLINE_LIST( ret, csExpr, 0, false, false ); ret << ")"; } return ret.str(); } string FsmCodeGen::TOP() { ostringstream ret; if ( topExpr == 0 ) ret << ACCESS() + "top"; else { ret << "("; INLINE_LIST( ret, topExpr, 0, false, false ); ret << ")"; } return ret.str(); } string FsmCodeGen::STACK() { ostringstream ret; if ( stackExpr == 0 ) ret << ACCESS() + "stack"; else { ret << "("; INLINE_LIST( ret, stackExpr, 0, false, false ); ret << ")"; } return ret.str(); } string FsmCodeGen::ACT() { ostringstream ret; if ( actExpr == 0 ) ret << ACCESS() + "act"; else { ret << "("; INLINE_LIST( ret, actExpr, 0, false, false ); ret << ")"; } return ret.str(); } string FsmCodeGen::TOKSTART() { ostringstream ret; if ( tokstartExpr == 0 ) ret << ACCESS() + "ts"; else { ret << "("; INLINE_LIST( ret, tokstartExpr, 0, false, false ); ret << ")"; } return ret.str(); } string FsmCodeGen::TOKEND() { ostringstream ret; if ( tokendExpr == 0 ) ret << ACCESS() + "te"; else { ret << "("; INLINE_LIST( ret, tokendExpr, 0, false, false ); ret << ")"; } return ret.str(); } string FsmCodeGen::GET_WIDE_KEY() { if ( redFsm->anyConditions() ) return "_widec"; else return GET_KEY(); } string FsmCodeGen::GET_WIDE_KEY( RedStateAp *state ) { if ( state->stateCondList.length() > 0 ) return "_widec"; else return GET_KEY(); } string FsmCodeGen::GET_KEY() { ostringstream ret; if ( getKeyExpr != 0 ) { /* Emit the user supplied method of retrieving the key. */ ret << "("; INLINE_LIST( ret, getKeyExpr, 0, false, false ); ret << ")"; } else { /* Expression for retrieving the key, use simple dereference. */ ret << "(*" << P() << ")"; } return ret.str(); } /* Write out level number of tabs. Makes the nested binary search nice * looking. */ string FsmCodeGen::TABS( int level ) { string result; while ( level-- > 0 ) result += "\t"; return result; } /* Write out a key from the fsm code gen. Depends on wether or not the key is * signed. */ string FsmCodeGen::KEY( Key key ) { ostringstream ret; if ( keyOps->isSigned || !hostLang->explicitUnsigned ) ret << key.getVal(); else ret << (unsigned long) key.getVal() << 'u'; return ret.str(); } bool FsmCodeGen::isAlphTypeSigned() { return keyOps->isSigned; } bool FsmCodeGen::isWideAlphTypeSigned() { string ret; if ( redFsm->maxKey <= keyOps->maxKey ) return isAlphTypeSigned(); else { long long maxKeyVal = redFsm->maxKey.getLongLong(); HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal ); return wideType->isSigned; } } string FsmCodeGen::WIDE_KEY( RedStateAp *state, Key key ) { if ( state->stateCondList.length() > 0 ) { ostringstream ret; if ( isWideAlphTypeSigned() ) ret << key.getVal(); else ret << (unsigned long) key.getVal() << 'u'; return ret.str(); } else { return KEY( key ); } } void FsmCodeGen::EOF_CHECK( ostream &ret ) { ret << " if ( " << P() << " == " << PE() << " )\n" " goto _test_eof;\n"; testEofUsed = true; } void FsmCodeGen::EXEC( ostream &ret, GenInlineItem *item, int targState, int inFinish ) { /* The parser gives fexec two children. The double brackets are for D * code. If the inline list is a single word it will get interpreted as a * C-style cast by the D compiler. */ ret << "{" << P() << " = (("; INLINE_LIST( ret, item->children, targState, inFinish, false ); ret << "))-1;}"; } void FsmCodeGen::LM_SWITCH( ostream &ret, GenInlineItem *item, int targState, int inFinish, bool csForced ) { ret << " switch( " << ACT() << " ) {\n"; bool haveDefault = false; for ( GenInlineList::Iter lma = *item->children; lma.lte(); lma++ ) { /* Write the case label, the action and the case break. */ if ( lma->lmId < 0 ) { ret << " default:\n"; haveDefault = true; } else ret << " case " << lma->lmId << ":\n"; /* Write the block and close it off. */ ret << " {"; INLINE_LIST( ret, lma->children, targState, inFinish, csForced ); ret << "}\n"; ret << " break;\n"; } if ( (hostLang->lang == HostLang::D || hostLang->lang == HostLang::D2) && !haveDefault ) ret << " default: break;"; ret << " }\n" "\t"; } void FsmCodeGen::SET_ACT( ostream &ret, GenInlineItem *item ) { ret << ACT() << " = " << item->lmId << ";"; } void FsmCodeGen::SET_TOKEND( ostream &ret, GenInlineItem *item ) { /* The tokend action sets tokend. */ ret << TOKEND() << " = " << P(); if ( item->offset != 0 ) out << "+" << item->offset; out << ";"; } void FsmCodeGen::GET_TOKEND( ostream &ret, GenInlineItem *item ) { ret << TOKEND(); } void FsmCodeGen::INIT_TOKSTART( ostream &ret, GenInlineItem *item ) { ret << TOKSTART() << " = " << NULL_ITEM() << ";"; } void FsmCodeGen::INIT_ACT( ostream &ret, GenInlineItem *item ) { ret << ACT() << " = 0;"; } void FsmCodeGen::SET_TOKSTART( ostream &ret, GenInlineItem *item ) { ret << TOKSTART() << " = " << P() << ";"; } void FsmCodeGen::SUB_ACTION( ostream &ret, GenInlineItem *item, int targState, bool inFinish, bool csForced ) { if ( item->children->length() > 0 ) { /* Write the block and close it off. */ ret << "{"; INLINE_LIST( ret, item->children, targState, inFinish, csForced ); ret << "}"; } } /* Write out an inline tree structure. Walks the list and possibly calls out * to virtual functions than handle language specific items in the tree. */ void FsmCodeGen::INLINE_LIST( ostream &ret, GenInlineList *inlineList, int targState, bool inFinish, bool csForced ) { for ( GenInlineList::Iter item = *inlineList; item.lte(); item++ ) { switch ( item->type ) { case GenInlineItem::Text: ret << item->data; break; case GenInlineItem::Goto: GOTO( ret, item->targState->id, inFinish ); break; case GenInlineItem::Call: CALL( ret, item->targState->id, targState, inFinish ); break; case GenInlineItem::Next: NEXT( ret, item->targState->id, inFinish ); break; case GenInlineItem::Ret: RET( ret, inFinish ); break; case GenInlineItem::PChar: ret << P(); break; case GenInlineItem::Char: ret << GET_KEY(); break; case GenInlineItem::Hold: ret << P() << "--;"; break; case GenInlineItem::Exec: EXEC( ret, item, targState, inFinish ); break; case GenInlineItem::Curs: CURS( ret, inFinish ); break; case GenInlineItem::Targs: TARGS( ret, inFinish, targState ); break; case GenInlineItem::Entry: ret << item->targState->id; break; case GenInlineItem::GotoExpr: GOTO_EXPR( ret, item, inFinish ); break; case GenInlineItem::CallExpr: CALL_EXPR( ret, item, targState, inFinish ); break; case GenInlineItem::NextExpr: NEXT_EXPR( ret, item, inFinish ); break; case GenInlineItem::LmSwitch: LM_SWITCH( ret, item, targState, inFinish, csForced ); break; case GenInlineItem::LmSetActId: SET_ACT( ret, item ); break; case GenInlineItem::LmSetTokEnd: SET_TOKEND( ret, item ); break; case GenInlineItem::LmGetTokEnd: GET_TOKEND( ret, item ); break; case GenInlineItem::LmInitTokStart: INIT_TOKSTART( ret, item ); break; case GenInlineItem::LmInitAct: INIT_ACT( ret, item ); break; case GenInlineItem::LmSetTokStart: SET_TOKSTART( ret, item ); break; case GenInlineItem::SubAction: SUB_ACTION( ret, item, targState, inFinish, csForced ); break; case GenInlineItem::Break: BREAK( ret, targState, csForced ); break; } } } /* Write out paths in line directives. Escapes any special characters. */ string FsmCodeGen::LDIR_PATH( char *path ) { ostringstream ret; for ( char *pc = path; *pc != 0; pc++ ) { if ( *pc == '\\' ) ret << "\\\\"; else ret << *pc; } return ret.str(); } void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish, bool csForced ) { /* Write the preprocessor line info for going into the source file. */ cdLineDirective( ret, action->loc.fileName, action->loc.line ); /* Write the block and close it off. */ ret << "\t{"; INLINE_LIST( ret, action->inlineList, targState, inFinish, csForced ); ret << "}\n"; } void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition ) { ret << "\n"; cdLineDirective( ret, condition->loc.fileName, condition->loc.line ); INLINE_LIST( ret, condition->inlineList, 0, false, false ); } string FsmCodeGen::ERROR_STATE() { ostringstream ret; if ( redFsm->errState != 0 ) ret << redFsm->errState->id; else ret << "-1"; return ret.str(); } string FsmCodeGen::FIRST_FINAL_STATE() { ostringstream ret; if ( redFsm->firstFinState != 0 ) ret << redFsm->firstFinState->id; else ret << redFsm->nextStateId; return ret.str(); } void FsmCodeGen::writeInit() { out << " {\n"; if ( !noCS ) out << "\t" << vCS() << " = " << START() << ";\n"; /* If there are any calls, then the stack top needs initialization. */ if ( redFsm->anyActionCalls() || redFsm->anyActionRets() ) out << "\t" << TOP() << " = 0;\n"; if ( hasLongestMatch ) { out << " " << TOKSTART() << " = " << NULL_ITEM() << ";\n" " " << TOKEND() << " = " << NULL_ITEM() << ";\n" " " << ACT() << " = 0;\n"; } out << " }\n"; } string FsmCodeGen::DATA_PREFIX() { if ( !noPrefix ) return FSM_NAME() + "_"; return ""; } /* Emit the alphabet data type. */ string FsmCodeGen::ALPH_TYPE() { string ret = keyOps->alphType->data1; if ( keyOps->alphType->data2 != 0 ) { ret += " "; ret += + keyOps->alphType->data2; } return ret; } /* Emit the alphabet data type. */ string FsmCodeGen::WIDE_ALPH_TYPE() { string ret; if ( redFsm->maxKey <= keyOps->maxKey ) ret = ALPH_TYPE(); else { long long maxKeyVal = redFsm->maxKey.getLongLong(); HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal ); assert( wideType != 0 ); ret = wideType->data1; if ( wideType->data2 != 0 ) { ret += " "; ret += wideType->data2; } } return ret; } void FsmCodeGen::STATE_IDS() { if ( redFsm->startState != 0 ) STATIC_VAR( "int", START() ) << " = " << START_STATE_ID() << ";\n"; if ( !noFinal ) STATIC_VAR( "int" , FIRST_FINAL() ) << " = " << FIRST_FINAL_STATE() << ";\n"; if ( !noError ) STATIC_VAR( "int", ERROR() ) << " = " << ERROR_STATE() << ";\n"; out << "\n"; if ( !noEntry && entryPointNames.length() > 0 ) { for ( EntryNameVect::Iter en = entryPointNames; en.lte(); en++ ) { STATIC_VAR( "int", DATA_PREFIX() + "en_" + *en ) << " = " << entryPointIds[en.pos()] << ";\n"; } out << "\n"; } } void FsmCodeGen::writeStart() { out << START_STATE_ID(); } void FsmCodeGen::writeFirstFinal() { out << FIRST_FINAL_STATE(); } void FsmCodeGen::writeError() { out << ERROR_STATE(); } /* * Language specific, but style independent code generators functions. */ string CCodeGen::PTR_CONST() { return "const "; } string CCodeGen::PTR_CONST_END() { return ""; } std::ostream &CCodeGen::OPEN_ARRAY( string type, string name ) { out << "static const " << type << " " << name << "[] = {\n"; return out; } std::ostream &CCodeGen::CLOSE_ARRAY() { return out << "};\n"; } std::ostream &CCodeGen::STATIC_VAR( string type, string name ) { out << "static const " << type << " " << name; return out; } string CCodeGen::UINT( ) { return "unsigned int"; } string CCodeGen::ARR_OFF( string ptr, string offset ) { return ptr + " + " + offset; } string CCodeGen::CAST( string type ) { return "(" + type + ")"; } string CCodeGen::NULL_ITEM() { return "0"; } string CCodeGen::POINTER() { return " *"; } std::ostream &CCodeGen::SWITCH_DEFAULT() { return out; } string CCodeGen::CTRL_FLOW() { return ""; } void CCodeGen::writeExports() { if ( exportList.length() > 0 ) { for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) { out << "#define " << DATA_PREFIX() << "ex_" << ex->name << " " << KEY(ex->key) << "\n"; } out << "\n"; } } /* * D Specific */ string DCodeGen::NULL_ITEM() { return "null"; } string DCodeGen::POINTER() { // multiple items seperated by commas can also be pointer types. return "* "; } string DCodeGen::PTR_CONST() { return ""; } string DCodeGen::PTR_CONST_END() { return ""; } std::ostream &DCodeGen::OPEN_ARRAY( string type, string name ) { out << "static const " << type << "[] " << name << " = [\n"; return out; } std::ostream &DCodeGen::CLOSE_ARRAY() { return out << "];\n"; } std::ostream &DCodeGen::STATIC_VAR( string type, string name ) { out << "static const " << type << " " << name; return out; } string DCodeGen::ARR_OFF( string ptr, string offset ) { return "&" + ptr + "[" + offset + "]"; } string DCodeGen::CAST( string type ) { return "cast(" + type + ")"; } string DCodeGen::UINT( ) { return "uint"; } std::ostream &DCodeGen::SWITCH_DEFAULT() { out << " default: break;\n"; return out; } string DCodeGen::CTRL_FLOW() { return "if (true) "; } void DCodeGen::writeExports() { if ( exportList.length() > 0 ) { for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) { out << "static const " << ALPH_TYPE() << " " << DATA_PREFIX() << "ex_" << ex->name << " = " << KEY(ex->key) << ";\n"; } out << "\n"; } } /* * End D-specific code. */ /* * D2 Specific */ string D2CodeGen::NULL_ITEM() { return "null"; } string D2CodeGen::POINTER() { // multiple items seperated by commas can also be pointer types. return "* "; } string D2CodeGen::PTR_CONST() { return "const("; } string D2CodeGen::PTR_CONST_END() { return ")"; } std::ostream &D2CodeGen::OPEN_ARRAY( string type, string name ) { out << "enum " << type << "[] " << name << " = [\n"; return out; } std::ostream &D2CodeGen::CLOSE_ARRAY() { return out << "];\n"; } std::ostream &D2CodeGen::STATIC_VAR( string type, string name ) { out << "enum " << type << " " << name; return out; } string D2CodeGen::ARR_OFF( string ptr, string offset ) { return "&" + ptr + "[" + offset + "]"; } string D2CodeGen::CAST( string type ) { return "cast(" + type + ")"; } string D2CodeGen::UINT( ) { return "uint"; } std::ostream &D2CodeGen::SWITCH_DEFAULT() { out << " default: break;\n"; return out; } string D2CodeGen::CTRL_FLOW() { return "if (true) "; } void D2CodeGen::writeExports() { if ( exportList.length() > 0 ) { for ( ExportList::Iter ex = exportList; ex.lte(); ex++ ) { out << "enum " << ALPH_TYPE() << " " << DATA_PREFIX() << "ex_" << ex->name << " = " << KEY(ex->key) << ";\n"; } out << "\n"; } } void D2CodeGen::SUB_ACTION( ostream &ret, GenInlineItem *item, int targState, bool inFinish, bool csForced ) { if ( item->children->length() > 0 ) { /* Write the block and close it off. */ ret << "{{"; INLINE_LIST( ret, item->children, targState, inFinish, csForced ); ret << "}}"; } } void D2CodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish, bool csForced ) { /* Write the preprocessor line info for going into the source file. */ cdLineDirective( ret, action->loc.fileName, action->loc.line ); /* Write the block and close it off. */ ret << "\t{{"; INLINE_LIST( ret, action->inlineList, targState, inFinish, csForced ); ret << "}}\n"; } /* * End D2-specific code. */ void FsmCodeGen::finishRagelDef() { if ( codeStyle == GenGoto || codeStyle == GenFGoto || codeStyle == GenIpGoto || codeStyle == GenSplit ) { /* For directly executable machines there is no required state * ordering. Choose a depth-first ordering to increase the * potential for fall-throughs. */ redFsm->depthFirstOrdering(); } else { /* The frontend will do this for us, but it may be a good idea to * force it if the intermediate file is edited. */ redFsm->sortByStateId(); } /* Choose default transitions and the single transition. */ redFsm->chooseDefaultSpan(); /* Maybe do flat expand, otherwise choose single. */ if ( codeStyle == GenFlat || codeStyle == GenFFlat ) redFsm->makeFlat(); else redFsm->chooseSingle(); /* If any errors have occured in the input file then don't write anything. */ if ( gblErrorCount > 0 ) return; if ( codeStyle == GenSplit ) redFsm->partitionFsm( numSplitPartitions ); if ( codeStyle == GenIpGoto || codeStyle == GenSplit ) redFsm->setInTrans(); /* Anlayze Machine will find the final action reference counts, among * other things. We will use these in reporting the usage * of fsm directives in action code. */ analyzeMachine(); /* Determine if we should use indicies. */ calcIndexSize(); } ostream &FsmCodeGen::source_warning( const InputLoc &loc ) { cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: "; return cerr; } ostream &FsmCodeGen::source_error( const InputLoc &loc ) { gblErrorCount += 1; assert( sourceFileName != 0 ); cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": "; return cerr; }