diff options
Diffstat (limited to 'libfsm/inputdata.cc')
-rw-r--r-- | libfsm/inputdata.cc | 1150 |
1 files changed, 1150 insertions, 0 deletions
diff --git a/libfsm/inputdata.cc b/libfsm/inputdata.cc new file mode 100644 index 00000000..66ec4afb --- /dev/null +++ b/libfsm/inputdata.cc @@ -0,0 +1,1150 @@ +/* + * Copyright 2008-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "common.h" +#include "inputdata.h" +#include "parsedata.h" +#include "load.h" +#include "rlscan.h" +#include "reducer.h" +#include "version.h" +#include "pcheck.h" +#include <colm/colm.h> + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <iostream> +#include <iomanip> +#include <fstream> +#include <unistd.h> +#include <sstream> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#if defined(HAVE_SYS_WAIT_H) +#include <sys/wait.h> +#endif + +#ifdef _WIN32 +#include <windows.h> +#include <psapi.h> +#include <time.h> +#include <io.h> +#include <process.h> + +#if _MSC_VER +#define S_IRUSR _S_IREAD +#define S_IWUSR _S_IWRITE +#endif +#endif + +using std::istream; +using std::ifstream; +using std::ofstream; +using std::stringstream; +using std::ostream; +using std::endl; +using std::ios; + +InputData::~InputData() +{ + inputItems.empty(); + parseDataList.empty(); + sectionList.empty(); + + for ( Vector<const char**>::Iter fns = streamFileNames; fns.lte(); fns++ ) { + const char **ptr = *fns; + while ( *ptr != 0 ) { + ::free( (void*)*ptr ); + ptr += 1; + } + free( (void*) *fns ); + } + + if ( outputFileName != 0 ) + delete[] outputFileName; + + if ( histogramFn != 0 ) + ::free( (void*)histogramFn ); + + if ( histogram != 0 ) + delete[] histogram; + + for ( ArgsVector::Iter bl = breadthLabels; bl.lte(); bl++ ) + free( (void*) *bl ); +} + +void InputData::makeDefaultFileName() +{ + if ( outputFileName == 0 ) + outputFileName = (hostLang->defaultOutFn)( inputFileName ); +} + +bool InputData::isBreadthLabel( const string &label ) +{ + for ( ArgsVector::Iter bl = breadthLabels; bl.lte(); bl++ ) { + if ( label == *bl ) + return true; + } + return false; +} + +void InputData::createOutputStream() +{ + /* Make sure we are not writing to the same file as the input file. */ + if ( outputFileName != 0 ) { + if ( strcmp( inputFileName, outputFileName ) == 0 ) { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endl; + } + + /* Create the filter on the output and open it. */ + outFilter = new output_filter( outputFileName ); + + /* Open the output stream, attaching it to the filter. */ + outStream = new ostream( outFilter ); + } + else { + /* Writing out to std out. */ + outStream = &std::cout; + } +} + +void InputData::openOutput() +{ + if ( outFilter != 0 ) { + outFilter->open( outputFileName, ios::out|ios::trunc ); + if ( !outFilter->is_open() ) { + error() << "error opening " << outputFileName << " for writing" << endl; + abortCompile( 1 ); + } + } +} + +void InputData::prepareSingleMachine() +{ + ParseData *pd = 0; + GraphDictEl *gdEl = 0; + + /* Locate a machine spec to generate dot output for. We can only emit. + * Dot takes one graph at a time. */ + if ( machineSpec != 0 ) { + /* Machine specified. */ + ParseDataDictEl *pdEl = parseDataDict.find( machineSpec ); + if ( pdEl == 0 ) + error() << "could not locate machine specified with -S and/or -M" << endp; + pd = pdEl->value; + } + else { + /* No machine spec given, generate the first one. */ + if ( parseDataList.length() == 0 ) + error() << "no machine specification to generate graphviz output" << endp; + + pd = parseDataList.head; + } + + if ( machineName != 0 ) { + gdEl = pd->graphDict.find( machineName ); + if ( gdEl == 0 ) + error() << "machine definition/instantiation not found" << endp; + } + else { + /* We are using the whole machine spec. Need to make sure there + * are instances in the spec. */ + if ( pd->instanceList.length() == 0 ) + error() << "no machine instantiations to generate graphviz output" << endp; + } + + pd->prepareMachineGen( gdEl, hostLang ); + dotGenPd = pd; +} + +void InputData::prepareAllMachines() +{ + for ( ParseDataDict::Iter pdel = parseDataDict; pdel.lte(); pdel++ ) { + ParseData *pd = pdel->value; + if ( pd->instanceList.length() > 0 ) { + pd->prepareMachineGen( 0, hostLang ); + + pd->makeExports(); + } + + } +} + +void InputData::generateReduced() +{ + for ( ParseDataDict::Iter pdel = parseDataDict; pdel.lte(); pdel++ ) { + ParseData *pd = pdel->value; + if ( pd->instanceList.length() > 0 ) + pd->generateReduced( inputFileName, codeStyle, *outStream, hostLang ); + } +} + +void InputData::verifyWriteHasData( InputItem *ii ) +{ + if ( ii->type == InputItem::Write ) { + if ( ii->pd->cgd == 0 ) + error( ii->loc ) << ii->pd->sectionName << ": no machine instantiations to write" << endl; + } +} + +void InputData::verifyWritesHaveData() +{ + for ( InputItemList::Iter ii = inputItems; ii.lte(); ii++ ) + verifyWriteHasData( ii ); +} + +void InputData::writeOutput( InputItem *ii ) +{ + /* If it is the first input item then check if we need to write the BOM. */ + if ( ii->prev == 0 && utf8BomPresent ) + *outStream << (uchar)0xEF << (uchar)0xBB << (uchar) 0xBF; + + switch ( ii->type ) { + case InputItem::Write: { + CodeGenData *cgd = ii->pd->cgd; + cgd->writeStatement( ii->loc, ii->writeArgs.size(), + ii->writeArgs, generateDot, hostLang ); + break; + } + case InputItem::HostData: { + switch ( hostLang->backend ) { + case Direct: + if ( ii->loc.fileName != 0 ) { + if ( ii->prev != 0 ) + *outStream << "\n"; + (*hostLang->genLineDirective)( *outStream, !noLineDirectives, ii->loc.line, ii->loc.fileName ); + } + + *outStream << ii->data.str(); + break; + case Translated: + openHostBlock( '@', this, *outStream, inputFileName, ii->loc.line ); + translatedHostData( *outStream, ii->data.str() ); + *outStream << "}@"; + break; + } + break; + } + case InputItem::EndSection: { + break; + } + } +} + +void InputData::closeOutput() +{ + /* If writing to a file, delete the ostream, causing it to flush. + * Standard out is flushed automatically. */ + if ( outputFileName != 0 ) { + delete outStream; + delete outFilter; + } +} + +void InputData::processDot() +{ + /* Compiles the DOT machines. */ + prepareSingleMachine(); + + if ( errorCount > 0 ) + abortCompile( 1 ); + + createOutputStream(); + + if ( errorCount > 0 ) + abortCompile( 1 ); + + /* + * From this point on we should not be reporting any errors. + */ + + openOutput(); + writeDot( *outStream ); + closeOutput(); +} + +bool InputData::checkLastRef( InputItem *ii ) +{ + if ( generateDot ) + return true; + + if ( errorCount > 0 ) + return false; + + /* + * 1. Go forward to next last reference. + * 2. Fully process that machine, mark as processed. + * 3. Move forward through input items until no longer + */ + if ( ii->section != 0 && ii->section->lastReference == ii ) { + /* Fully Process. */ + ParseData *pd = ii->pd; + + if ( pd->instanceList.length() > 0 ) { +#ifdef WITH_RAGEL_KELBT + if ( ii->parser != 0 ) + ii->parser->terminateParser(); +#endif + + FsmRes res = pd->prepareMachineGen( 0, hostLang ); + + /* Compute exports from the export definitions. */ + pd->makeExports(); + + if ( !res.success() ) + return false; + + if ( errorCount > 0 ) + return false; + + pd->generateReduced( inputFileName, codeStyle, *outStream, hostLang ); + + if ( errorCount > 0 ) + return false; + } + + /* Mark all input items referencing the machine as processed. */ + InputItem *toMark = lastFlush; + while ( true ) { + toMark->processed = true; + + if ( toMark == ii ) + break; + + toMark = toMark->next; + } + + /* Move forward, flushing input items until we get to an unprocessed + * input item. */ + while ( lastFlush != 0 && lastFlush->processed ) { + verifyWriteHasData( lastFlush ); + + if ( errorCount > 0 ) + return false; + + /* Flush out. */ + writeOutput( lastFlush ); + + lastFlush = lastFlush->next; + } + } + return true; +} + +void InputData::makeFirstInputItem() +{ + /* Make the first input item. */ + InputItem *firstInputItem = new InputItem; + firstInputItem->type = InputItem::HostData; + firstInputItem->loc.fileName = inputFileName; + firstInputItem->loc.line = 1; + firstInputItem->loc.col = 1; + inputItems.append( firstInputItem ); +} + +/* Send eof to all parsers. */ +void InputData::terminateAllParsers( ) +{ +#ifdef WITH_RAGEL_KELBT + for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ ) + pdel->value->terminateParser(); +#endif +} + +void InputData::flushRemaining() +{ + InputItem *item = inputItems.head; + + while ( item != 0 ) { + checkLastRef( item ); + item = item->next; + } + + /* Flush remaining items. */ + while ( lastFlush != 0 ) { + /* Flush out. */ + writeOutput( lastFlush ); + + lastFlush = lastFlush->next; + } +} + +void InputData::makeTranslateOutputFileName() +{ + origOutputFileName = outputFileName; + outputFileName = fileNameFromStem( outputFileName, ".ri" ); + genOutputFileName = outputFileName; +} + +#ifdef WITH_RAGEL_KELBT +void InputData::parseKelbt() +{ + /* + * Ragel Parser from ragel 6. + */ + ifstream *inFileStream; + istream *inStream; + + /* Open the input file for reading. */ + assert( inputFileName != 0 ); + inFileStream = new ifstream( inputFileName ); + if ( ! inFileStream->is_open() ) + error() << "could not open " << inputFileName << " for reading" << endp; + inStream = inFileStream; + + makeFirstInputItem(); + + Scanner scanner( this, inputFileName, *inStream, 0, 0, 0, false ); + + scanner.sectionPass = true; + scanner.do_scan(); + + inStream->clear(); + inStream->seekg( 0, std::ios::beg ); + curItem = inputItems.head; + lastFlush = inputItems.head; + + scanner.sectionPass = false; + scanner.do_scan(); + + /* Finished, final check for errors.. */ + if ( errorCount > 0 ) + abortCompile( 1 ); + + /* Bail on above error. */ + if ( errorCount > 0 ) + abortCompile( 1 ); + + delete inFileStream; +} + +void InputData::processKelbt() +{ + /* With the kelbt version we implement two parse passes. The first is used + * to identify the last time that any given machine is referenced by a + * ragel section. In the second pass we parse, compile, and emit as far + * forward as possible when we encounter the last reference to a machine. + * */ + + if ( generateDot ) { + parseKelbt(); + terminateAllParsers(); + processDot(); + } + else { + createOutputStream(); + openOutput(); + parseKelbt(); + flushRemaining(); + closeOutput(); + } + + assert( errorCount == 0 ); +} +#endif + +bool InputData::parseReduce() +{ + /* + * Colm-based reduction parser introduced in ragel 7. + */ + + TopLevel *topLevel = new TopLevel( frontendSections, this, hostLang, + minimizeLevel, minimizeOpt ); + + /* Check input file. File is actually opened by colm code. We don't + * need to perform the check if in libragel since it comes in via a + * string. */ + if ( input == 0 ) { + ifstream *inFile = new ifstream( inputFileName ); + if ( ! inFile->is_open() ) + error() << "could not open " << inputFileName << " for reading" << endp; + delete inFile; + } + + if ( errorCount ) + return false; + + makeFirstInputItem(); + + curItem = inputItems.head; + lastFlush = inputItems.head; + + + topLevel->reduceFile( "rlparse", inputFileName ); + + if ( errorCount ) + return false; + + bool success = topLevel->success; + + delete topLevel; + return success; +} + +bool InputData::processReduce() +{ + if ( generateDot ) { + parseReduce(); + processDot(); + return true; + } + else { + createOutputStream(); + openOutput(); + + bool success = parseReduce(); + if ( success ) + flushRemaining(); + + closeOutput(); + + if ( !success && outputFileName != 0 ) + unlink( outputFileName ); + + return success; + } +} + +bool InputData::process() +{ + switch ( frontend ) { + case KelbtBased: { +#ifdef WITH_RAGEL_KELBT + processKelbt(); +#endif + return true; + } + case ReduceBased: { + return processReduce(); + } + } + return false; +} + +/* Print a summary of the options. */ +void InputData::usage() +{ + info() << +"usage: ragel [options] file\n" +"general:\n" +" -h, -H, -?, --help Print this usage and exit\n" +" -v, --version Print version information and exit\n" +" -o <file> Write output to <file>\n" +" -s Print some statistics and compilation info to stderr\n" +" -d Do not remove duplicates from action lists\n" +" -I <dir> Add <dir> to the list of directories to search\n" +" for included an imported files\n" +" --rlhc Show the rlhc command used to compile\n" +" --save-temps Do not delete intermediate file during compilation\n" +" --no-intermediate Disable call to rlhc, leave behind intermediate\n" +"error reporting format:\n" +" --error-format=gnu file:line:column: message (default)\n" +" --error-format=msvc file(line,column): message\n" +"fsm minimization:\n" +" -n Do not perform minimization\n" +" -m Minimize at the end of the compilation\n" +" -l Minimize after most operations (default)\n" +" -e Minimize after every operation\n" +"visualization:\n" +" -V Generate a dot file for Graphviz\n" +" -p Display printable characters on labels\n" +" -S <spec> FSM specification to output (for graphviz output)\n" +" -M <machine> Machine definition/instantiation to output (for\n" +" graphviz output)\n" +"host language:\n" +" -C C, C++, Obj-C or Obj-C++ (default)\n" +" All code styles supported.\n" +" --asm --gas-x86-64-sys-v\n" +" GNU AS, x86_64, System V ABI.\n" +" Generated in a code style equivalent to -G2\n" +" -D D All code styles supported\n" +" -Z Go All code styles supported\n" +" -A C# -T0 -T1 -F0 -F1 -G0 -G1\n" +" -J Java -T0 -T1 -F0 -F1\n" +" -R Ruby -T0 -T1 -F0 -F1\n" +" -O OCaml -T0 -T1 -F0 -F1\n" +" -U Rust -T0 -T1 -F0 -F1\n" +" -Y Julia -T0 -T1 -F0 -F1\n" +" -K Crack -T0 -T1 -F0 -F1\n" +" -P JavaScript -T0 -T1 -F0 -F1\n" +"line directives:\n" +" -L Inhibit writing of #line directives\n" +"code style:\n" +" -T0 Binary search (default)\n" +" -T1 Binary search with expanded actions \n" +" -F0 Flat table\n" +" -F1 Flat table with expanded actions\n" +" -G0 Switch-driven\n" +" -G1 Switch-driven with expanded actions\n" +" -G2 Goto-driven with expanded actions\n" +"large machines:\n" +" --integral-tables Use integers for table data (default)\n" +" --string-tables Encode table data into strings for faster host lang\n" +" compilation\n" +"analysis:\n" +" --prior-interaction Search for condition-based general repetitions\n" +" that will not function properly due to state mod\n" +" overlap and must be NFA reps. \n" +" --conds-depth=D Search for high-cost conditions inside a prefix\n" +" of the machine (depth D from start state).\n" +" --state-limit=L Report fail if number of states exceeds this\n" +" during compilation.\n" +" --breadth-check=E1,E2,.. Report breadth cost of named entry points and\n" +" the start state.\n" +" --input-histogram=FN Input char histogram for breadth check. If\n" +" unspecified a flat histogram is used.\n" +"testing:\n" +" --kelbt-frontend Compile using original ragel + kelbt frontend\n" +" Requires ragel be built with ragel + kelbt support\n" +" --colm-frontend Compile using a colm-based recursive descent\n" +" frontend\n" +" --reduce-frontend Compile using a colm-based reducer (default)\n" +" --var-backend Use the variable-based backend even if the host lang\n" +" supports goto-based\n" +" --supported-host-langs Show supported host languages by command line arg\n" +" --supported-frontends Show supported frontends\n" +" --supported-backends Show supported backends\n" +" --force-libragel Cause mainline to behave like libragel\n" + ; + + abortCompile( 0 ); +} + +/* Print version information and exit. */ +void InputData::version() +{ + info() << "Ragel State Machine Compiler version " RAGEL_VERSION << " " RAGEL_PUBDATE << endl << + "Copyright (c) 2001-2019 by Adrian Thurston et al." << endl; + abortCompile( 0 ); +} + +void InputData::showFrontends() +{ + ostream &out = info(); + out << "--colm-frontend"; + out << " --reduce-frontend"; +#ifdef WITH_RAGEL_KELBT + out << " --kelbt-frontend"; +#endif + out << endl; + abortCompile( 0 ); +} + +void InputData::showBackends() +{ + info() << + "--direct-backend --colm-backend" << endl; + abortCompile( 0 ); +} + +InputLoc makeInputLoc( const char *fileName, int line, int col ) +{ + InputLoc loc( fileName, line, col ); + return loc; +} + +void escapeLineDirectivePath( std::ostream &out, char *path ) +{ + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } +} + +void InputData::parseArgs( int argc, const char **argv ) +{ + ParamCheck pc( "o:dnmleabjkS:M:I:vHh?-:sT:F:W:G:LpV", argc, argv ); + + /* Decide if we were invoked using a path variable, or with an explicit path. */ + const char *lastSlash = strrchr( argv[0], '/' ); + if ( lastSlash == 0 ) { + /* Defualt to the the binary install location. */ + dirName = BINDIR; + } + else { + /* Compute dirName from argv0. */ + dirName = string( argv[0], lastSlash - argv[0] ); + } + + /* FIXME: Need to check code styles VS langauge. */ + + while ( pc.check() ) { + switch ( pc.state ) { + case ParamCheck::match: + switch ( pc.parameter ) { + case 'V': + generateDot = true; + break; + + /* Output. */ + case 'o': + if ( *pc.paramArg == 0 ) + error() << "a zero length output file name was given" << endl; + else if ( outputFileName != 0 ) + error() << "more than one output file name was given" << endl; + else { + /* Ok, remember the output file name. */ + outputFileName = new char[strlen(pc.paramArg)+1]; + strcpy( (char*)outputFileName, pc.paramArg ); + } + break; + + /* Flag for turning off duplicate action removal. */ + case 'd': + wantDupsRemoved = false; + break; + + /* Minimization, mostly hidden options. */ + case 'n': + minimizeOpt = MinimizeNone; + break; + case 'm': + minimizeOpt = MinimizeEnd; + break; + case 'l': + minimizeOpt = MinimizeMostOps; + break; + case 'e': + minimizeOpt = MinimizeEveryOp; + break; + case 'a': + #ifdef TO_UPGRADE_CONDS + minimizeLevel = MinimizeApprox; + #else + error() << "minimize approx (-a) unsupported in this version" << endp; + #endif + break; + case 'b': + #ifdef TO_UPGRADE_CONDS + minimizeLevel = MinimizeStable; + #else + error() << "minimize stable (-b) unsupported in this version" << endp; + #endif + break; + case 'j': + minimizeLevel = MinimizePartition1; + break; + case 'k': + minimizeLevel = MinimizePartition2; + break; + + /* Machine spec. */ + case 'S': + if ( *pc.paramArg == 0 ) + error() << "please specify an argument to -S" << endl; + else if ( machineSpec != 0 ) + error() << "more than one -S argument was given" << endl; + else { + /* Ok, remember the path to the machine to generate. */ + machineSpec = pc.paramArg; + } + break; + + /* Machine path. */ + case 'M': + if ( *pc.paramArg == 0 ) + error() << "please specify an argument to -M" << endl; + else if ( machineName != 0 ) + error() << "more than one -M argument was given" << endl; + else { + /* Ok, remember the machine name to generate. */ + machineName = pc.paramArg; + } + break; + + case 'I': + if ( *pc.paramArg == 0 ) + error() << "please specify an argument to -I" << endl; + else { + includePaths.append( pc.paramArg ); + } + break; + + /* Version and help. */ + case 'v': + version(); + break; + case 'H': case 'h': case '?': + usage(); + break; + case 's': + printStatistics = true; + break; + case '-': { + char *arg = strdup( pc.paramArg ); + char *eq = strchr( arg, '=' ); + + if ( eq != 0 ) + *eq++ = 0; + + if ( strcmp( arg, "help" ) == 0 ) + usage(); + else if ( strcmp( arg, "version" ) == 0 ) + version(); + else if ( strcmp( arg, "error-format" ) == 0 ) { + if ( eq == 0 ) + error() << "expecting '=value' for error-format" << endl; + else if ( strcmp( eq, "gnu" ) == 0 ) + errorFormat = ErrorFormatGNU; + else if ( strcmp( eq, "msvc" ) == 0 ) + errorFormat = ErrorFormatMSVC; + else + error() << "invalid value for error-format" << endl; + } + else if ( strcmp( arg, "rlhc" ) == 0 ) + rlhc = true; + else if ( strcmp( arg, "no-intermediate" ) == 0 ) + noIntermediate = true; +#ifdef WITH_RAGEL_KELBT + else if ( strcmp( arg, "kelbt-frontend" ) == 0 ) { + frontend = KelbtBased; + frontendSpecified = true; + } +#else + else if ( strcmp( arg, "kelbt-frontend" ) == 0 ) { + error() << "--kelbt-frontend specified but, " + "ragel not built with ragel+kelbt support" << endp; + } +#endif + else if ( strcmp( arg, "reduce-frontend" ) == 0 ) { + frontend = ReduceBased; + frontendSpecified = true; + } + else if ( strcmp( arg, "string-tables" ) == 0 ) + stringTables = true; + else if ( strcmp( arg, "integral-tables" ) == 0 ) + stringTables = false; + else if ( strcmp( arg, "supported-frontends" ) == 0 ) + showFrontends(); + else if ( strcmp( arg, "supported-backends" ) == 0 ) + showBackends(); + else if ( strcmp( arg, "save-temps" ) == 0 ) + saveTemps = true; + + else if ( strcmp( arg, "prior-interaction" ) == 0 ) + checkPriorInteraction = true; + else if ( strcmp( arg, "conds-depth" ) == 0 ) + condsCheckDepth = strtol( eq, 0, 10 ); + else if ( strcmp( arg, "state-limit" ) == 0 ) + stateLimit = strtol( eq, 0, 10 ); + + else if ( strcmp( arg, "breadth-check" ) == 0 ) { + char *ptr = 0; + while ( true ) { + char *label = strtok_r( eq, ",", &ptr ); + eq = NULL; + if ( label == NULL ) + break; + breadthLabels.append( strdup( label ) ); + } + checkBreadth = true; + } + else if ( strcmp( arg, "input-histogram" ) == 0 ) + histogramFn = strdup(eq); + else if ( strcmp( arg, "var-backend" ) == 0 ) + forceVar = true; + else if ( strcmp( arg, "no-fork" ) == 0 ) + noFork = true; + else { + error() << "--" << pc.paramArg << + " is an invalid argument" << endl; + } + free( arg ); + break; + } + + /* Passthrough args. */ + case 'T': + if ( pc.paramArg[0] == '0' ) + codeStyle = GenBinaryLoop; + else if ( pc.paramArg[0] == '1' ) + codeStyle = GenBinaryExp; + else { + error() << "-T" << pc.paramArg[0] << + " is an invalid argument" << endl; + abortCompile( 1 ); + } + break; + case 'F': + if ( pc.paramArg[0] == '0' ) + codeStyle = GenFlatLoop; + else if ( pc.paramArg[0] == '1' ) + codeStyle = GenFlatExp; + else { + error() << "-F" << pc.paramArg[0] << + " is an invalid argument" << endl; + abortCompile( 1 ); + } + break; + case 'G': + if ( pc.paramArg[0] == '0' ) + codeStyle = GenGotoLoop; + else if ( pc.paramArg[0] == '1' ) + codeStyle = GenGotoExp; + else if ( pc.paramArg[0] == '2' ) + codeStyle = GenIpGoto; + else if ( pc.paramArg[0] == 'T' && pc.paramArg[1] == '2' ) { + codeStyle = GenIpGoto; + maxTransitions = 32; + } else { + error() << "-G" << pc.paramArg[0] << + " is an invalid argument" << endl; + abortCompile( 1 ); + } + break; + case 'W': + if ( pc.paramArg[0] == '0' ) + codeStyle = GenSwitchLoop; + else if ( pc.paramArg[0] == '1' ) + codeStyle = GenSwitchExp; + else { + error() << "-G" << pc.paramArg[0] << + " is an invalid argument" << endl; + abortCompile( 1 ); + } + break; + + case 'p': + displayPrintables = true; + break; + + case 'L': + noLineDirectives = true; + break; + } + break; + + case ParamCheck::invalid: + error() << "-" << pc.parameter << " is an invalid argument" << endl; + break; + + case ParamCheck::noparam: + /* It is interpreted as an input file. */ + if ( *pc.curArg == 0 ) + error() << "a zero length input file name was given" << endl; + else if ( inputFileName != 0 ) + error() << "more than one input file name was given" << endl; + else { + /* OK, Remember the filename. */ + inputFileName = pc.curArg; + } + break; + } + } +} + +void InputData::loadHistogram() +{ + const int alphsize = 256; + + /* Init a default. */ + histogram = new double[alphsize]; + ifstream h( histogramFn ); + if ( !h.is_open() ) + error() << "histogram read: failed to open file: " << histogramFn << endp; + + int i = 0; + double value; + while ( true ) { + if ( h >> value ) { + if ( i >= alphsize ) { + /* Too many items. */ + error() << "histogram read: too many histogram values," + " expecting " << alphsize << " (for char alphabet)" << endp; + } + histogram[i] = value; + i++; + } + else { + /* Read failure. */ + if ( h.eof() ) { + if ( i < alphsize ) { + error() << "histogram read: fell short of " << + alphsize << " items" << endp; + } + break; + } + else { + error() << "histogram read: error at item " << i << endp; + } + } + } +} + +void InputData::defaultHistogram() +{ + /* Flat histogram. */ + const int alphsize = 256; + histogram = new double[alphsize]; + for ( int i = 0; i < alphsize; i++ ) { + histogram[i] = 1.0 / (double)alphsize; + } +} + +void InputData::checkArgs() +{ + /* Require an input file. If we use standard in then we won't have a file + * name on which to base the output. */ + if ( inputFileName == 0 ) + error() << "no input file given" << endl; + + /* Bail on argument processing errors. */ + if ( errorCount > 0 ) + abortCompile( 1 ); + + /* Make sure we are not writing to the same file as the input file. */ + if ( inputFileName != 0 && outputFileName != 0 && + strcmp( inputFileName, outputFileName ) == 0 ) + { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endp; + } + + if ( !frontendSpecified ) + frontend = ReduceBased; + + if ( checkBreadth ) { + if ( histogramFn != 0 ) + loadHistogram(); + else + defaultHistogram(); + } +} + +char *InputData::readInput( const char *inputFileName ) +{ + struct stat st; + int res = stat( inputFileName, &st ); + if ( res != 0 ) { + error() << inputFileName << ": stat failed: " << strerror(errno) << endl; + return 0; + } + + std::ifstream in( inputFileName ); + if ( !in.is_open() ) { + error() << inputFileName << ": could not open in force-libragel mode"; + return 0; + } + + char *input = new char[st.st_size+1]; + in.read( input, st.st_size ); + if ( in.gcount() != st.st_size ) { + error() << inputFileName << ": could not read in force-libragel mode"; + delete[] input; + return 0; + } + input[st.st_size] = 0; + + return input; +} + +int InputData::main( int argc, const char **argv ) +{ + int code = 0; + try { + parseArgs( argc, argv ); + checkArgs(); + if ( !generateDot ) + makeDefaultFileName(); + + if ( !process() ) + abortCompile( 1 ); + } + catch ( const AbortCompile &ac ) { + code = ac.code; + } + + return code; +} + +int InputData::runFrontend( int argc, const char **argv ) +{ + if ( !process() ) + return -1; + return 0; +} + +int InputData::runRlhc( int argc, const char **argv ) +{ + struct colm_program *prg; + int exit_status; + + prg = colm_new_program( rlhcSections ); + colm_set_debug( prg, 0 ); + colm_run_program( prg, argc, argv ); + exit_status = colm_delete_program( prg ); + return exit_status; +} + +/* Run a job (frontend or backend). If we want forks then we return the result + * via the process's exit code. otherwise it comes back on the stack. */ +int InputData::runJob( const char *what, IdProcess idProcess, int argc, const char **argv ) +{ +#if defined(HAVE_SYS_WAIT_H) + if ( !noFork ) { + pid_t pid = fork(); + + if ( pid == 0 ) { + int es = (this->*idProcess)( argc, argv ); + exit( es ); + } + + int status = 0; + waitpid( pid, &status, 0 ); + if ( WIFSIGNALED(status) ) { + error() << what << " stopped by signal: " << WTERMSIG(status) << std::endl; + return -1; + } + + return WEXITSTATUS( status ); + } +#endif + return (this->*idProcess)( argc, argv ); +} + +int InputData::rlhcMain( int argc, const char **argv ) +{ + parseArgs( argc, argv ); + checkArgs(); + makeDefaultFileName(); + makeTranslateOutputFileName(); + + int es = runJob( "frontend", &InputData::runFrontend, 0, 0 ); + + if ( es != 0 ) + return es; + + /* rlhc <input> <output> */ + const char *_argv[] = { "rlhc", + genOutputFileName.c_str(), + origOutputFileName.c_str(), 0 }; + + return runJob( "rlhc", &InputData::runRlhc, 3, _argv ); +} |