summaryrefslogtreecommitdiff
path: root/libfsm/inputdata.cc
diff options
context:
space:
mode:
Diffstat (limited to 'libfsm/inputdata.cc')
-rw-r--r--libfsm/inputdata.cc1150
1 files changed, 1150 insertions, 0 deletions
diff --git a/libfsm/inputdata.cc b/libfsm/inputdata.cc
new file mode 100644
index 00000000..66ec4afb
--- /dev/null
+++ b/libfsm/inputdata.cc
@@ -0,0 +1,1150 @@
+/*
+ * Copyright 2008-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ragel.h"
+#include "common.h"
+#include "inputdata.h"
+#include "parsedata.h"
+#include "load.h"
+#include "rlscan.h"
+#include "reducer.h"
+#include "version.h"
+#include "pcheck.h"
+#include <colm/colm.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <unistd.h>
+#include <sstream>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#if defined(HAVE_SYS_WAIT_H)
+#include <sys/wait.h>
+#endif
+
+#ifdef _WIN32
+#include <windows.h>
+#include <psapi.h>
+#include <time.h>
+#include <io.h>
+#include <process.h>
+
+#if _MSC_VER
+#define S_IRUSR _S_IREAD
+#define S_IWUSR _S_IWRITE
+#endif
+#endif
+
+using std::istream;
+using std::ifstream;
+using std::ofstream;
+using std::stringstream;
+using std::ostream;
+using std::endl;
+using std::ios;
+
+InputData::~InputData()
+{
+ inputItems.empty();
+ parseDataList.empty();
+ sectionList.empty();
+
+ for ( Vector<const char**>::Iter fns = streamFileNames; fns.lte(); fns++ ) {
+ const char **ptr = *fns;
+ while ( *ptr != 0 ) {
+ ::free( (void*)*ptr );
+ ptr += 1;
+ }
+ free( (void*) *fns );
+ }
+
+ if ( outputFileName != 0 )
+ delete[] outputFileName;
+
+ if ( histogramFn != 0 )
+ ::free( (void*)histogramFn );
+
+ if ( histogram != 0 )
+ delete[] histogram;
+
+ for ( ArgsVector::Iter bl = breadthLabels; bl.lte(); bl++ )
+ free( (void*) *bl );
+}
+
+void InputData::makeDefaultFileName()
+{
+ if ( outputFileName == 0 )
+ outputFileName = (hostLang->defaultOutFn)( inputFileName );
+}
+
+bool InputData::isBreadthLabel( const string &label )
+{
+ for ( ArgsVector::Iter bl = breadthLabels; bl.lte(); bl++ ) {
+ if ( label == *bl )
+ return true;
+ }
+ return false;
+}
+
+void InputData::createOutputStream()
+{
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( outputFileName != 0 ) {
+ if ( strcmp( inputFileName, outputFileName ) == 0 ) {
+ error() << "output file \"" << outputFileName <<
+ "\" is the same as the input file" << endl;
+ }
+
+ /* Create the filter on the output and open it. */
+ outFilter = new output_filter( outputFileName );
+
+ /* Open the output stream, attaching it to the filter. */
+ outStream = new ostream( outFilter );
+ }
+ else {
+ /* Writing out to std out. */
+ outStream = &std::cout;
+ }
+}
+
+void InputData::openOutput()
+{
+ if ( outFilter != 0 ) {
+ outFilter->open( outputFileName, ios::out|ios::trunc );
+ if ( !outFilter->is_open() ) {
+ error() << "error opening " << outputFileName << " for writing" << endl;
+ abortCompile( 1 );
+ }
+ }
+}
+
+void InputData::prepareSingleMachine()
+{
+ ParseData *pd = 0;
+ GraphDictEl *gdEl = 0;
+
+ /* Locate a machine spec to generate dot output for. We can only emit.
+ * Dot takes one graph at a time. */
+ if ( machineSpec != 0 ) {
+ /* Machine specified. */
+ ParseDataDictEl *pdEl = parseDataDict.find( machineSpec );
+ if ( pdEl == 0 )
+ error() << "could not locate machine specified with -S and/or -M" << endp;
+ pd = pdEl->value;
+ }
+ else {
+ /* No machine spec given, generate the first one. */
+ if ( parseDataList.length() == 0 )
+ error() << "no machine specification to generate graphviz output" << endp;
+
+ pd = parseDataList.head;
+ }
+
+ if ( machineName != 0 ) {
+ gdEl = pd->graphDict.find( machineName );
+ if ( gdEl == 0 )
+ error() << "machine definition/instantiation not found" << endp;
+ }
+ else {
+ /* We are using the whole machine spec. Need to make sure there
+ * are instances in the spec. */
+ if ( pd->instanceList.length() == 0 )
+ error() << "no machine instantiations to generate graphviz output" << endp;
+ }
+
+ pd->prepareMachineGen( gdEl, hostLang );
+ dotGenPd = pd;
+}
+
+void InputData::prepareAllMachines()
+{
+ for ( ParseDataDict::Iter pdel = parseDataDict; pdel.lte(); pdel++ ) {
+ ParseData *pd = pdel->value;
+ if ( pd->instanceList.length() > 0 ) {
+ pd->prepareMachineGen( 0, hostLang );
+
+ pd->makeExports();
+ }
+
+ }
+}
+
+void InputData::generateReduced()
+{
+ for ( ParseDataDict::Iter pdel = parseDataDict; pdel.lte(); pdel++ ) {
+ ParseData *pd = pdel->value;
+ if ( pd->instanceList.length() > 0 )
+ pd->generateReduced( inputFileName, codeStyle, *outStream, hostLang );
+ }
+}
+
+void InputData::verifyWriteHasData( InputItem *ii )
+{
+ if ( ii->type == InputItem::Write ) {
+ if ( ii->pd->cgd == 0 )
+ error( ii->loc ) << ii->pd->sectionName << ": no machine instantiations to write" << endl;
+ }
+}
+
+void InputData::verifyWritesHaveData()
+{
+ for ( InputItemList::Iter ii = inputItems; ii.lte(); ii++ )
+ verifyWriteHasData( ii );
+}
+
+void InputData::writeOutput( InputItem *ii )
+{
+ /* If it is the first input item then check if we need to write the BOM. */
+ if ( ii->prev == 0 && utf8BomPresent )
+ *outStream << (uchar)0xEF << (uchar)0xBB << (uchar) 0xBF;
+
+ switch ( ii->type ) {
+ case InputItem::Write: {
+ CodeGenData *cgd = ii->pd->cgd;
+ cgd->writeStatement( ii->loc, ii->writeArgs.size(),
+ ii->writeArgs, generateDot, hostLang );
+ break;
+ }
+ case InputItem::HostData: {
+ switch ( hostLang->backend ) {
+ case Direct:
+ if ( ii->loc.fileName != 0 ) {
+ if ( ii->prev != 0 )
+ *outStream << "\n";
+ (*hostLang->genLineDirective)( *outStream, !noLineDirectives, ii->loc.line, ii->loc.fileName );
+ }
+
+ *outStream << ii->data.str();
+ break;
+ case Translated:
+ openHostBlock( '@', this, *outStream, inputFileName, ii->loc.line );
+ translatedHostData( *outStream, ii->data.str() );
+ *outStream << "}@";
+ break;
+ }
+ break;
+ }
+ case InputItem::EndSection: {
+ break;
+ }
+ }
+}
+
+void InputData::closeOutput()
+{
+ /* If writing to a file, delete the ostream, causing it to flush.
+ * Standard out is flushed automatically. */
+ if ( outputFileName != 0 ) {
+ delete outStream;
+ delete outFilter;
+ }
+}
+
+void InputData::processDot()
+{
+ /* Compiles the DOT machines. */
+ prepareSingleMachine();
+
+ if ( errorCount > 0 )
+ abortCompile( 1 );
+
+ createOutputStream();
+
+ if ( errorCount > 0 )
+ abortCompile( 1 );
+
+ /*
+ * From this point on we should not be reporting any errors.
+ */
+
+ openOutput();
+ writeDot( *outStream );
+ closeOutput();
+}
+
+bool InputData::checkLastRef( InputItem *ii )
+{
+ if ( generateDot )
+ return true;
+
+ if ( errorCount > 0 )
+ return false;
+
+ /*
+ * 1. Go forward to next last reference.
+ * 2. Fully process that machine, mark as processed.
+ * 3. Move forward through input items until no longer
+ */
+ if ( ii->section != 0 && ii->section->lastReference == ii ) {
+ /* Fully Process. */
+ ParseData *pd = ii->pd;
+
+ if ( pd->instanceList.length() > 0 ) {
+#ifdef WITH_RAGEL_KELBT
+ if ( ii->parser != 0 )
+ ii->parser->terminateParser();
+#endif
+
+ FsmRes res = pd->prepareMachineGen( 0, hostLang );
+
+ /* Compute exports from the export definitions. */
+ pd->makeExports();
+
+ if ( !res.success() )
+ return false;
+
+ if ( errorCount > 0 )
+ return false;
+
+ pd->generateReduced( inputFileName, codeStyle, *outStream, hostLang );
+
+ if ( errorCount > 0 )
+ return false;
+ }
+
+ /* Mark all input items referencing the machine as processed. */
+ InputItem *toMark = lastFlush;
+ while ( true ) {
+ toMark->processed = true;
+
+ if ( toMark == ii )
+ break;
+
+ toMark = toMark->next;
+ }
+
+ /* Move forward, flushing input items until we get to an unprocessed
+ * input item. */
+ while ( lastFlush != 0 && lastFlush->processed ) {
+ verifyWriteHasData( lastFlush );
+
+ if ( errorCount > 0 )
+ return false;
+
+ /* Flush out. */
+ writeOutput( lastFlush );
+
+ lastFlush = lastFlush->next;
+ }
+ }
+ return true;
+}
+
+void InputData::makeFirstInputItem()
+{
+ /* Make the first input item. */
+ InputItem *firstInputItem = new InputItem;
+ firstInputItem->type = InputItem::HostData;
+ firstInputItem->loc.fileName = inputFileName;
+ firstInputItem->loc.line = 1;
+ firstInputItem->loc.col = 1;
+ inputItems.append( firstInputItem );
+}
+
+/* Send eof to all parsers. */
+void InputData::terminateAllParsers( )
+{
+#ifdef WITH_RAGEL_KELBT
+ for ( ParserDict::Iter pdel = parserDict; pdel.lte(); pdel++ )
+ pdel->value->terminateParser();
+#endif
+}
+
+void InputData::flushRemaining()
+{
+ InputItem *item = inputItems.head;
+
+ while ( item != 0 ) {
+ checkLastRef( item );
+ item = item->next;
+ }
+
+ /* Flush remaining items. */
+ while ( lastFlush != 0 ) {
+ /* Flush out. */
+ writeOutput( lastFlush );
+
+ lastFlush = lastFlush->next;
+ }
+}
+
+void InputData::makeTranslateOutputFileName()
+{
+ origOutputFileName = outputFileName;
+ outputFileName = fileNameFromStem( outputFileName, ".ri" );
+ genOutputFileName = outputFileName;
+}
+
+#ifdef WITH_RAGEL_KELBT
+void InputData::parseKelbt()
+{
+ /*
+ * Ragel Parser from ragel 6.
+ */
+ ifstream *inFileStream;
+ istream *inStream;
+
+ /* Open the input file for reading. */
+ assert( inputFileName != 0 );
+ inFileStream = new ifstream( inputFileName );
+ if ( ! inFileStream->is_open() )
+ error() << "could not open " << inputFileName << " for reading" << endp;
+ inStream = inFileStream;
+
+ makeFirstInputItem();
+
+ Scanner scanner( this, inputFileName, *inStream, 0, 0, 0, false );
+
+ scanner.sectionPass = true;
+ scanner.do_scan();
+
+ inStream->clear();
+ inStream->seekg( 0, std::ios::beg );
+ curItem = inputItems.head;
+ lastFlush = inputItems.head;
+
+ scanner.sectionPass = false;
+ scanner.do_scan();
+
+ /* Finished, final check for errors.. */
+ if ( errorCount > 0 )
+ abortCompile( 1 );
+
+ /* Bail on above error. */
+ if ( errorCount > 0 )
+ abortCompile( 1 );
+
+ delete inFileStream;
+}
+
+void InputData::processKelbt()
+{
+ /* With the kelbt version we implement two parse passes. The first is used
+ * to identify the last time that any given machine is referenced by a
+ * ragel section. In the second pass we parse, compile, and emit as far
+ * forward as possible when we encounter the last reference to a machine.
+ * */
+
+ if ( generateDot ) {
+ parseKelbt();
+ terminateAllParsers();
+ processDot();
+ }
+ else {
+ createOutputStream();
+ openOutput();
+ parseKelbt();
+ flushRemaining();
+ closeOutput();
+ }
+
+ assert( errorCount == 0 );
+}
+#endif
+
+bool InputData::parseReduce()
+{
+ /*
+ * Colm-based reduction parser introduced in ragel 7.
+ */
+
+ TopLevel *topLevel = new TopLevel( frontendSections, this, hostLang,
+ minimizeLevel, minimizeOpt );
+
+ /* Check input file. File is actually opened by colm code. We don't
+ * need to perform the check if in libragel since it comes in via a
+ * string. */
+ if ( input == 0 ) {
+ ifstream *inFile = new ifstream( inputFileName );
+ if ( ! inFile->is_open() )
+ error() << "could not open " << inputFileName << " for reading" << endp;
+ delete inFile;
+ }
+
+ if ( errorCount )
+ return false;
+
+ makeFirstInputItem();
+
+ curItem = inputItems.head;
+ lastFlush = inputItems.head;
+
+
+ topLevel->reduceFile( "rlparse", inputFileName );
+
+ if ( errorCount )
+ return false;
+
+ bool success = topLevel->success;
+
+ delete topLevel;
+ return success;
+}
+
+bool InputData::processReduce()
+{
+ if ( generateDot ) {
+ parseReduce();
+ processDot();
+ return true;
+ }
+ else {
+ createOutputStream();
+ openOutput();
+
+ bool success = parseReduce();
+ if ( success )
+ flushRemaining();
+
+ closeOutput();
+
+ if ( !success && outputFileName != 0 )
+ unlink( outputFileName );
+
+ return success;
+ }
+}
+
+bool InputData::process()
+{
+ switch ( frontend ) {
+ case KelbtBased: {
+#ifdef WITH_RAGEL_KELBT
+ processKelbt();
+#endif
+ return true;
+ }
+ case ReduceBased: {
+ return processReduce();
+ }
+ }
+ return false;
+}
+
+/* Print a summary of the options. */
+void InputData::usage()
+{
+ info() <<
+"usage: ragel [options] file\n"
+"general:\n"
+" -h, -H, -?, --help Print this usage and exit\n"
+" -v, --version Print version information and exit\n"
+" -o <file> Write output to <file>\n"
+" -s Print some statistics and compilation info to stderr\n"
+" -d Do not remove duplicates from action lists\n"
+" -I <dir> Add <dir> to the list of directories to search\n"
+" for included an imported files\n"
+" --rlhc Show the rlhc command used to compile\n"
+" --save-temps Do not delete intermediate file during compilation\n"
+" --no-intermediate Disable call to rlhc, leave behind intermediate\n"
+"error reporting format:\n"
+" --error-format=gnu file:line:column: message (default)\n"
+" --error-format=msvc file(line,column): message\n"
+"fsm minimization:\n"
+" -n Do not perform minimization\n"
+" -m Minimize at the end of the compilation\n"
+" -l Minimize after most operations (default)\n"
+" -e Minimize after every operation\n"
+"visualization:\n"
+" -V Generate a dot file for Graphviz\n"
+" -p Display printable characters on labels\n"
+" -S <spec> FSM specification to output (for graphviz output)\n"
+" -M <machine> Machine definition/instantiation to output (for\n"
+" graphviz output)\n"
+"host language:\n"
+" -C C, C++, Obj-C or Obj-C++ (default)\n"
+" All code styles supported.\n"
+" --asm --gas-x86-64-sys-v\n"
+" GNU AS, x86_64, System V ABI.\n"
+" Generated in a code style equivalent to -G2\n"
+" -D D All code styles supported\n"
+" -Z Go All code styles supported\n"
+" -A C# -T0 -T1 -F0 -F1 -G0 -G1\n"
+" -J Java -T0 -T1 -F0 -F1\n"
+" -R Ruby -T0 -T1 -F0 -F1\n"
+" -O OCaml -T0 -T1 -F0 -F1\n"
+" -U Rust -T0 -T1 -F0 -F1\n"
+" -Y Julia -T0 -T1 -F0 -F1\n"
+" -K Crack -T0 -T1 -F0 -F1\n"
+" -P JavaScript -T0 -T1 -F0 -F1\n"
+"line directives:\n"
+" -L Inhibit writing of #line directives\n"
+"code style:\n"
+" -T0 Binary search (default)\n"
+" -T1 Binary search with expanded actions \n"
+" -F0 Flat table\n"
+" -F1 Flat table with expanded actions\n"
+" -G0 Switch-driven\n"
+" -G1 Switch-driven with expanded actions\n"
+" -G2 Goto-driven with expanded actions\n"
+"large machines:\n"
+" --integral-tables Use integers for table data (default)\n"
+" --string-tables Encode table data into strings for faster host lang\n"
+" compilation\n"
+"analysis:\n"
+" --prior-interaction Search for condition-based general repetitions\n"
+" that will not function properly due to state mod\n"
+" overlap and must be NFA reps. \n"
+" --conds-depth=D Search for high-cost conditions inside a prefix\n"
+" of the machine (depth D from start state).\n"
+" --state-limit=L Report fail if number of states exceeds this\n"
+" during compilation.\n"
+" --breadth-check=E1,E2,.. Report breadth cost of named entry points and\n"
+" the start state.\n"
+" --input-histogram=FN Input char histogram for breadth check. If\n"
+" unspecified a flat histogram is used.\n"
+"testing:\n"
+" --kelbt-frontend Compile using original ragel + kelbt frontend\n"
+" Requires ragel be built with ragel + kelbt support\n"
+" --colm-frontend Compile using a colm-based recursive descent\n"
+" frontend\n"
+" --reduce-frontend Compile using a colm-based reducer (default)\n"
+" --var-backend Use the variable-based backend even if the host lang\n"
+" supports goto-based\n"
+" --supported-host-langs Show supported host languages by command line arg\n"
+" --supported-frontends Show supported frontends\n"
+" --supported-backends Show supported backends\n"
+" --force-libragel Cause mainline to behave like libragel\n"
+ ;
+
+ abortCompile( 0 );
+}
+
+/* Print version information and exit. */
+void InputData::version()
+{
+ info() << "Ragel State Machine Compiler version " RAGEL_VERSION << " " RAGEL_PUBDATE << endl <<
+ "Copyright (c) 2001-2019 by Adrian Thurston et al." << endl;
+ abortCompile( 0 );
+}
+
+void InputData::showFrontends()
+{
+ ostream &out = info();
+ out << "--colm-frontend";
+ out << " --reduce-frontend";
+#ifdef WITH_RAGEL_KELBT
+ out << " --kelbt-frontend";
+#endif
+ out << endl;
+ abortCompile( 0 );
+}
+
+void InputData::showBackends()
+{
+ info() <<
+ "--direct-backend --colm-backend" << endl;
+ abortCompile( 0 );
+}
+
+InputLoc makeInputLoc( const char *fileName, int line, int col )
+{
+ InputLoc loc( fileName, line, col );
+ return loc;
+}
+
+void escapeLineDirectivePath( std::ostream &out, char *path )
+{
+ for ( char *pc = path; *pc != 0; pc++ ) {
+ if ( *pc == '\\' )
+ out << "\\\\";
+ else
+ out << *pc;
+ }
+}
+
+void InputData::parseArgs( int argc, const char **argv )
+{
+ ParamCheck pc( "o:dnmleabjkS:M:I:vHh?-:sT:F:W:G:LpV", argc, argv );
+
+ /* Decide if we were invoked using a path variable, or with an explicit path. */
+ const char *lastSlash = strrchr( argv[0], '/' );
+ if ( lastSlash == 0 ) {
+ /* Defualt to the the binary install location. */
+ dirName = BINDIR;
+ }
+ else {
+ /* Compute dirName from argv0. */
+ dirName = string( argv[0], lastSlash - argv[0] );
+ }
+
+ /* FIXME: Need to check code styles VS langauge. */
+
+ while ( pc.check() ) {
+ switch ( pc.state ) {
+ case ParamCheck::match:
+ switch ( pc.parameter ) {
+ case 'V':
+ generateDot = true;
+ break;
+
+ /* Output. */
+ case 'o':
+ if ( *pc.paramArg == 0 )
+ error() << "a zero length output file name was given" << endl;
+ else if ( outputFileName != 0 )
+ error() << "more than one output file name was given" << endl;
+ else {
+ /* Ok, remember the output file name. */
+ outputFileName = new char[strlen(pc.paramArg)+1];
+ strcpy( (char*)outputFileName, pc.paramArg );
+ }
+ break;
+
+ /* Flag for turning off duplicate action removal. */
+ case 'd':
+ wantDupsRemoved = false;
+ break;
+
+ /* Minimization, mostly hidden options. */
+ case 'n':
+ minimizeOpt = MinimizeNone;
+ break;
+ case 'm':
+ minimizeOpt = MinimizeEnd;
+ break;
+ case 'l':
+ minimizeOpt = MinimizeMostOps;
+ break;
+ case 'e':
+ minimizeOpt = MinimizeEveryOp;
+ break;
+ case 'a':
+ #ifdef TO_UPGRADE_CONDS
+ minimizeLevel = MinimizeApprox;
+ #else
+ error() << "minimize approx (-a) unsupported in this version" << endp;
+ #endif
+ break;
+ case 'b':
+ #ifdef TO_UPGRADE_CONDS
+ minimizeLevel = MinimizeStable;
+ #else
+ error() << "minimize stable (-b) unsupported in this version" << endp;
+ #endif
+ break;
+ case 'j':
+ minimizeLevel = MinimizePartition1;
+ break;
+ case 'k':
+ minimizeLevel = MinimizePartition2;
+ break;
+
+ /* Machine spec. */
+ case 'S':
+ if ( *pc.paramArg == 0 )
+ error() << "please specify an argument to -S" << endl;
+ else if ( machineSpec != 0 )
+ error() << "more than one -S argument was given" << endl;
+ else {
+ /* Ok, remember the path to the machine to generate. */
+ machineSpec = pc.paramArg;
+ }
+ break;
+
+ /* Machine path. */
+ case 'M':
+ if ( *pc.paramArg == 0 )
+ error() << "please specify an argument to -M" << endl;
+ else if ( machineName != 0 )
+ error() << "more than one -M argument was given" << endl;
+ else {
+ /* Ok, remember the machine name to generate. */
+ machineName = pc.paramArg;
+ }
+ break;
+
+ case 'I':
+ if ( *pc.paramArg == 0 )
+ error() << "please specify an argument to -I" << endl;
+ else {
+ includePaths.append( pc.paramArg );
+ }
+ break;
+
+ /* Version and help. */
+ case 'v':
+ version();
+ break;
+ case 'H': case 'h': case '?':
+ usage();
+ break;
+ case 's':
+ printStatistics = true;
+ break;
+ case '-': {
+ char *arg = strdup( pc.paramArg );
+ char *eq = strchr( arg, '=' );
+
+ if ( eq != 0 )
+ *eq++ = 0;
+
+ if ( strcmp( arg, "help" ) == 0 )
+ usage();
+ else if ( strcmp( arg, "version" ) == 0 )
+ version();
+ else if ( strcmp( arg, "error-format" ) == 0 ) {
+ if ( eq == 0 )
+ error() << "expecting '=value' for error-format" << endl;
+ else if ( strcmp( eq, "gnu" ) == 0 )
+ errorFormat = ErrorFormatGNU;
+ else if ( strcmp( eq, "msvc" ) == 0 )
+ errorFormat = ErrorFormatMSVC;
+ else
+ error() << "invalid value for error-format" << endl;
+ }
+ else if ( strcmp( arg, "rlhc" ) == 0 )
+ rlhc = true;
+ else if ( strcmp( arg, "no-intermediate" ) == 0 )
+ noIntermediate = true;
+#ifdef WITH_RAGEL_KELBT
+ else if ( strcmp( arg, "kelbt-frontend" ) == 0 ) {
+ frontend = KelbtBased;
+ frontendSpecified = true;
+ }
+#else
+ else if ( strcmp( arg, "kelbt-frontend" ) == 0 ) {
+ error() << "--kelbt-frontend specified but, "
+ "ragel not built with ragel+kelbt support" << endp;
+ }
+#endif
+ else if ( strcmp( arg, "reduce-frontend" ) == 0 ) {
+ frontend = ReduceBased;
+ frontendSpecified = true;
+ }
+ else if ( strcmp( arg, "string-tables" ) == 0 )
+ stringTables = true;
+ else if ( strcmp( arg, "integral-tables" ) == 0 )
+ stringTables = false;
+ else if ( strcmp( arg, "supported-frontends" ) == 0 )
+ showFrontends();
+ else if ( strcmp( arg, "supported-backends" ) == 0 )
+ showBackends();
+ else if ( strcmp( arg, "save-temps" ) == 0 )
+ saveTemps = true;
+
+ else if ( strcmp( arg, "prior-interaction" ) == 0 )
+ checkPriorInteraction = true;
+ else if ( strcmp( arg, "conds-depth" ) == 0 )
+ condsCheckDepth = strtol( eq, 0, 10 );
+ else if ( strcmp( arg, "state-limit" ) == 0 )
+ stateLimit = strtol( eq, 0, 10 );
+
+ else if ( strcmp( arg, "breadth-check" ) == 0 ) {
+ char *ptr = 0;
+ while ( true ) {
+ char *label = strtok_r( eq, ",", &ptr );
+ eq = NULL;
+ if ( label == NULL )
+ break;
+ breadthLabels.append( strdup( label ) );
+ }
+ checkBreadth = true;
+ }
+ else if ( strcmp( arg, "input-histogram" ) == 0 )
+ histogramFn = strdup(eq);
+ else if ( strcmp( arg, "var-backend" ) == 0 )
+ forceVar = true;
+ else if ( strcmp( arg, "no-fork" ) == 0 )
+ noFork = true;
+ else {
+ error() << "--" << pc.paramArg <<
+ " is an invalid argument" << endl;
+ }
+ free( arg );
+ break;
+ }
+
+ /* Passthrough args. */
+ case 'T':
+ if ( pc.paramArg[0] == '0' )
+ codeStyle = GenBinaryLoop;
+ else if ( pc.paramArg[0] == '1' )
+ codeStyle = GenBinaryExp;
+ else {
+ error() << "-T" << pc.paramArg[0] <<
+ " is an invalid argument" << endl;
+ abortCompile( 1 );
+ }
+ break;
+ case 'F':
+ if ( pc.paramArg[0] == '0' )
+ codeStyle = GenFlatLoop;
+ else if ( pc.paramArg[0] == '1' )
+ codeStyle = GenFlatExp;
+ else {
+ error() << "-F" << pc.paramArg[0] <<
+ " is an invalid argument" << endl;
+ abortCompile( 1 );
+ }
+ break;
+ case 'G':
+ if ( pc.paramArg[0] == '0' )
+ codeStyle = GenGotoLoop;
+ else if ( pc.paramArg[0] == '1' )
+ codeStyle = GenGotoExp;
+ else if ( pc.paramArg[0] == '2' )
+ codeStyle = GenIpGoto;
+ else if ( pc.paramArg[0] == 'T' && pc.paramArg[1] == '2' ) {
+ codeStyle = GenIpGoto;
+ maxTransitions = 32;
+ } else {
+ error() << "-G" << pc.paramArg[0] <<
+ " is an invalid argument" << endl;
+ abortCompile( 1 );
+ }
+ break;
+ case 'W':
+ if ( pc.paramArg[0] == '0' )
+ codeStyle = GenSwitchLoop;
+ else if ( pc.paramArg[0] == '1' )
+ codeStyle = GenSwitchExp;
+ else {
+ error() << "-G" << pc.paramArg[0] <<
+ " is an invalid argument" << endl;
+ abortCompile( 1 );
+ }
+ break;
+
+ case 'p':
+ displayPrintables = true;
+ break;
+
+ case 'L':
+ noLineDirectives = true;
+ break;
+ }
+ break;
+
+ case ParamCheck::invalid:
+ error() << "-" << pc.parameter << " is an invalid argument" << endl;
+ break;
+
+ case ParamCheck::noparam:
+ /* It is interpreted as an input file. */
+ if ( *pc.curArg == 0 )
+ error() << "a zero length input file name was given" << endl;
+ else if ( inputFileName != 0 )
+ error() << "more than one input file name was given" << endl;
+ else {
+ /* OK, Remember the filename. */
+ inputFileName = pc.curArg;
+ }
+ break;
+ }
+ }
+}
+
+void InputData::loadHistogram()
+{
+ const int alphsize = 256;
+
+ /* Init a default. */
+ histogram = new double[alphsize];
+ ifstream h( histogramFn );
+ if ( !h.is_open() )
+ error() << "histogram read: failed to open file: " << histogramFn << endp;
+
+ int i = 0;
+ double value;
+ while ( true ) {
+ if ( h >> value ) {
+ if ( i >= alphsize ) {
+ /* Too many items. */
+ error() << "histogram read: too many histogram values,"
+ " expecting " << alphsize << " (for char alphabet)" << endp;
+ }
+ histogram[i] = value;
+ i++;
+ }
+ else {
+ /* Read failure. */
+ if ( h.eof() ) {
+ if ( i < alphsize ) {
+ error() << "histogram read: fell short of " <<
+ alphsize << " items" << endp;
+ }
+ break;
+ }
+ else {
+ error() << "histogram read: error at item " << i << endp;
+ }
+ }
+ }
+}
+
+void InputData::defaultHistogram()
+{
+ /* Flat histogram. */
+ const int alphsize = 256;
+ histogram = new double[alphsize];
+ for ( int i = 0; i < alphsize; i++ ) {
+ histogram[i] = 1.0 / (double)alphsize;
+ }
+}
+
+void InputData::checkArgs()
+{
+ /* Require an input file. If we use standard in then we won't have a file
+ * name on which to base the output. */
+ if ( inputFileName == 0 )
+ error() << "no input file given" << endl;
+
+ /* Bail on argument processing errors. */
+ if ( errorCount > 0 )
+ abortCompile( 1 );
+
+ /* Make sure we are not writing to the same file as the input file. */
+ if ( inputFileName != 0 && outputFileName != 0 &&
+ strcmp( inputFileName, outputFileName ) == 0 )
+ {
+ error() << "output file \"" << outputFileName <<
+ "\" is the same as the input file" << endp;
+ }
+
+ if ( !frontendSpecified )
+ frontend = ReduceBased;
+
+ if ( checkBreadth ) {
+ if ( histogramFn != 0 )
+ loadHistogram();
+ else
+ defaultHistogram();
+ }
+}
+
+char *InputData::readInput( const char *inputFileName )
+{
+ struct stat st;
+ int res = stat( inputFileName, &st );
+ if ( res != 0 ) {
+ error() << inputFileName << ": stat failed: " << strerror(errno) << endl;
+ return 0;
+ }
+
+ std::ifstream in( inputFileName );
+ if ( !in.is_open() ) {
+ error() << inputFileName << ": could not open in force-libragel mode";
+ return 0;
+ }
+
+ char *input = new char[st.st_size+1];
+ in.read( input, st.st_size );
+ if ( in.gcount() != st.st_size ) {
+ error() << inputFileName << ": could not read in force-libragel mode";
+ delete[] input;
+ return 0;
+ }
+ input[st.st_size] = 0;
+
+ return input;
+}
+
+int InputData::main( int argc, const char **argv )
+{
+ int code = 0;
+ try {
+ parseArgs( argc, argv );
+ checkArgs();
+ if ( !generateDot )
+ makeDefaultFileName();
+
+ if ( !process() )
+ abortCompile( 1 );
+ }
+ catch ( const AbortCompile &ac ) {
+ code = ac.code;
+ }
+
+ return code;
+}
+
+int InputData::runFrontend( int argc, const char **argv )
+{
+ if ( !process() )
+ return -1;
+ return 0;
+}
+
+int InputData::runRlhc( int argc, const char **argv )
+{
+ struct colm_program *prg;
+ int exit_status;
+
+ prg = colm_new_program( rlhcSections );
+ colm_set_debug( prg, 0 );
+ colm_run_program( prg, argc, argv );
+ exit_status = colm_delete_program( prg );
+ return exit_status;
+}
+
+/* Run a job (frontend or backend). If we want forks then we return the result
+ * via the process's exit code. otherwise it comes back on the stack. */
+int InputData::runJob( const char *what, IdProcess idProcess, int argc, const char **argv )
+{
+#if defined(HAVE_SYS_WAIT_H)
+ if ( !noFork ) {
+ pid_t pid = fork();
+
+ if ( pid == 0 ) {
+ int es = (this->*idProcess)( argc, argv );
+ exit( es );
+ }
+
+ int status = 0;
+ waitpid( pid, &status, 0 );
+ if ( WIFSIGNALED(status) ) {
+ error() << what << " stopped by signal: " << WTERMSIG(status) << std::endl;
+ return -1;
+ }
+
+ return WEXITSTATUS( status );
+ }
+#endif
+ return (this->*idProcess)( argc, argv );
+}
+
+int InputData::rlhcMain( int argc, const char **argv )
+{
+ parseArgs( argc, argv );
+ checkArgs();
+ makeDefaultFileName();
+ makeTranslateOutputFileName();
+
+ int es = runJob( "frontend", &InputData::runFrontend, 0, 0 );
+
+ if ( es != 0 )
+ return es;
+
+ /* rlhc <input> <output> */
+ const char *_argv[] = { "rlhc",
+ genOutputFileName.c_str(),
+ origOutputFileName.c_str(), 0 };
+
+ return runJob( "rlhc", &InputData::runRlhc, 3, _argv );
+}