diff options
author | Adrian Thurston <thurston@colm.net> | 2019-09-09 10:19:58 -0600 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2019-09-09 10:19:58 -0600 |
commit | fff52cd5a567ec541cd487b9fee2d89bf9b6f6eb (patch) | |
tree | 046a3f0f457343f7c99bf096863daf023a085051 /test/rlparse.d/case | |
parent | 2d8e9c3f5c0417d6237c945c50f92bf8d28b32d5 (diff) | |
download | colm-fff52cd5a567ec541cd487b9fee2d89bf9b6f6eb.tar.gz |
base (aapl, colm, ragel) test cases building
Diffstat (limited to 'test/rlparse.d/case')
-rw-r--r-- | test/rlparse.d/case/rlparse.h | 224 | ||||
-rw-r--r-- | test/rlparse.d/case/rlscan--colm-frontend.exp | 891 | ||||
-rw-r--r-- | test/rlparse.d/case/rlscan--reduce-frontend.exp | 894 | ||||
-rw-r--r-- | test/rlparse.d/case/rlscan.rl | 1192 |
4 files changed, 3201 insertions, 0 deletions
diff --git a/test/rlparse.d/case/rlparse.h b/test/rlparse.d/case/rlparse.h new file mode 100644 index 00000000..a2c791e8 --- /dev/null +++ b/test/rlparse.d/case/rlparse.h @@ -0,0 +1,224 @@ +/* Automatically generated by Kelbt from "rlparse.kh". + * + * Parts of this file are copied from Kelbt source covered by the GNU + * GPL. As a special exception, you may use the parts of this file copied + * from Kelbt source without restriction. The remainder is derived from + * "rlparse.kh" and inherits the copyright status of that file. + */ + +#line 1 "rlparse.kh" +/* + * Copyright 2001-2007 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _RLPARSE_H +#define _RLPARSE_H + +#include <iostream> +#include "avltree.h" +#include "parsedata.h" + + +/* Import scanner tokens. */ +#define IMP_Word 128 +#define IMP_Literal 129 +#define IMP_UInt 130 +#define IMP_Define 131 + +struct ParamList; + +struct TokHead +{ + TokHead *next; +}; + +struct Parser6 +{ +#line 102 "rlparse.kh" + + + #line 58 "rlparse.h" + struct Parser6_Block *block; + struct Parser6_LangEl *freshEl; + int freshPos; + struct Parser6_LangEl *pool; + int numRetry; + int numNodes; + struct Parser6_LangEl *stackTop; + struct Parser6_LangEl *lastFinal; + int errCount; + int curs; +#line 105 "rlparse.kh" + + void init(); + int parseLangEl( int type, const Token *token ); + void clear(); + + Parser6( InputData *id, const char *fileName, char *sectionName, + const InputLoc §ionLoc, const HostLang *hostLang, + MinimizeLevel minimizeLevel, + MinimizeOpt minimizeOpt ); + + int token( InputLoc &loc, int tokId, char *tokstart, int toklen ); + void tryMachineDef( const InputLoc &loc, char *name, + MachineDef *machineDef, bool isInstance ); + + /* Report an error encountered by the parser. */ + ostream &parse_error( int tokId, Token &token ); + + ParseData *pd; + + /* The name of the root section, this does not change during an include. */ + char *sectionName; + const HostLang *hostLang; + + NameRef nameRef; + NameRefList nameRefList; + + Vector<bool> exportContext; + + TokHead *tokHead; + ActionParamList *paramList; + + Parser6 *prev, *next; + + void terminateParser(); + + bool parseSubstitutions; +}; + +#line 108 "rlparse.h" +#define TK_Word 128 +#define TK_Literal 129 +#define TK_EndSection 130 +#define TK_UInt 131 +#define TK_Hex 132 +#define TK_DotDot 133 +#define TK_ColonGt 134 +#define TK_ColonGtGt 135 +#define TK_LtColon 136 +#define TK_Arrow 137 +#define TK_DoubleArrow 138 +#define TK_StarStar 139 +#define TK_ColonEquals 140 +#define TK_BarEquals 141 +#define TK_NameSep 142 +#define TK_BarStar 143 +#define TK_DashDash 144 +#define TK_DotDotIndep 145 +#define TK_StartCond 146 +#define TK_AllCond 147 +#define TK_LeavingCond 148 +#define TK_Middle 149 +#define TK_StartGblError 150 +#define TK_AllGblError 151 +#define TK_FinalGblError 152 +#define TK_NotFinalGblError 153 +#define TK_NotStartGblError 154 +#define TK_MiddleGblError 155 +#define TK_StartLocalError 156 +#define TK_AllLocalError 157 +#define TK_FinalLocalError 158 +#define TK_NotFinalLocalError 159 +#define TK_NotStartLocalError 160 +#define TK_MiddleLocalError 161 +#define TK_StartEOF 162 +#define TK_AllEOF 163 +#define TK_FinalEOF 164 +#define TK_NotFinalEOF 165 +#define TK_NotStartEOF 166 +#define TK_MiddleEOF 167 +#define TK_StartToState 168 +#define TK_AllToState 169 +#define TK_FinalToState 170 +#define TK_NotFinalToState 171 +#define TK_NotStartToState 172 +#define TK_MiddleToState 173 +#define TK_StartFromState 174 +#define TK_AllFromState 175 +#define TK_FinalFromState 176 +#define TK_NotFinalFromState 177 +#define TK_NotStartFromState 178 +#define TK_MiddleFromState 179 +#define TK_ColonNfaOpen 180 +#define TK_CloseColon 181 +#define TK_ColonCondOpen 182 +#define TK_ColonCondStarOpen 183 +#define TK_ColonCondPlusOpen 184 +#define TK_ColonNoMaxOpen 185 +#define RE_Slash 186 +#define RE_SqOpen 187 +#define RE_SqOpenNeg 188 +#define RE_SqClose 189 +#define RE_Dot 190 +#define RE_Star 191 +#define RE_Dash 192 +#define RE_Char 193 +#define IL_WhiteSpace 194 +#define IL_Comment 195 +#define IL_Literal 196 +#define IL_Symbol 197 +#define KW_Machine 198 +#define KW_Include 199 +#define KW_Import 200 +#define KW_Write 201 +#define KW_Action 202 +#define KW_AlphType 203 +#define KW_Range 204 +#define KW_GetKey 205 +#define KW_InWhen 206 +#define KW_When 207 +#define KW_OutWhen 208 +#define KW_Eof 209 +#define KW_Err 210 +#define KW_Lerr 211 +#define KW_To 212 +#define KW_From 213 +#define KW_Export 214 +#define KW_PrePush 215 +#define KW_PostPop 216 +#define KW_Length 217 +#define KW_NfaPrePush 218 +#define KW_NfaPostPop 219 +#define KW_Break 220 +#define KW_Exec 221 +#define KW_Hold 222 +#define KW_PChar 223 +#define KW_Char 224 +#define KW_Goto 225 +#define KW_Call 226 +#define KW_Ret 227 +#define KW_CurState 228 +#define KW_TargState 229 +#define KW_Entry 230 +#define KW_Next 231 +#define KW_Variable 232 +#define KW_Access 233 +#define KW_Ncall 234 +#define KW_Nret 235 +#define KW_Nbreak 236 +#define TK_SubstRef 237 +#define Parser6_tk_eof 238 + +#line 144 "rlparse.kh" + +void clearTokdata( Parser6 *parser ); + +#endif diff --git a/test/rlparse.d/case/rlscan--colm-frontend.exp b/test/rlparse.d/case/rlscan--colm-frontend.exp new file mode 100644 index 00000000..37c55280 --- /dev/null +++ b/test/rlparse.d/case/rlscan--colm-frontend.exp @@ -0,0 +1,891 @@ +/* + * Copyright 2006-2007 Adrian Thurston <thurston@complang.org> + * Copyright 2011 Josef Goettgens + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <fstream> +#include <string.h> + +#include "ragel.h" +#include "rlscan.h" +#include "inputdata.h" + +//#define LOG_TOKENS + +using std::ifstream; +using std::istream; +using std::ostream; +using std::endl; + +enum InlineBlockType +{ + CurlyDelimited, + SemiTerminated +}; + +char *newTokdata( int toklen ) +{ + char *tokdata = new char[sizeof(TokHead) + toklen + 1]; + return tokdata + sizeof(TokHead); +} + +void deleteTokdata( char *tokdata ) +{ + if ( tokdata ) + delete[] ( tokdata - sizeof(TokHead) ); +} + +void linkTokdata( Parser6 *parser, char *tokdata ) +{ + TokHead *head = (TokHead*)( tokdata - sizeof(TokHead) ); + head->next = parser->tokHead; + parser->tokHead = head; +} + +void clearTokdata( Parser6 *parser ) +{ + while ( parser->tokHead != 0 ) { + TokHead *next = parser->tokHead->next; + delete[] (char*)parser->tokHead; + parser->tokHead = next; + } +} + +/* + * The Scanner for Importing + */ + + + + +write: data + + +void Scanner::flushImport() +{ + int *p = token_data; + int *pe = token_data + cur_token; + int *eof = 0; + + +write: init +write: exec + + + if ( tok_ts == 0 ) + cur_token = 0; + else { + cur_token = pe - tok_ts; + int ts_offset = tok_ts - token_data; + memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) ); + memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) ); + memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) ); + } +} + +void Scanner::directToParser( Parser6 *toParser, const char *tokFileName, int tokLine, + int tokColumn, int type, char *tokdata, int toklen ) +{ + InputLoc loc; + + #ifdef LOG_TOKENS + cerr << "scanner:" << tokLine << ":" << tokColumn << + ": sending token to the parser " << Parser6_lelNames[type]; + cerr << " " << toklen; + if ( tokdata != 0 ) + cerr << " " << tokdata; + cerr << endl; + #endif + + loc.fileName = tokFileName; + loc.line = tokLine; + loc.col = tokColumn; + + toParser->token( loc, type, tokdata, toklen ); +} + +void Scanner::importToken( int token, char *start, char *end ) +{ + if ( cur_token == max_tokens ) + flushImport(); + + token_data[cur_token] = token; + if ( start == 0 ) { + token_strings[cur_token] = 0; + token_lens[cur_token] = 0; + } + else { + int toklen = end-start; + token_lens[cur_token] = toklen; + token_strings[cur_token] = new char[toklen+1]; + memcpy( token_strings[cur_token], start, toklen ); + token_strings[cur_token][toklen] = 0; + } + cur_token++; +} + +void Scanner::pass() +{ + if ( sectionPass ) + return; + + updateCol(); + + /* If no errors and we are at the bottom of the include stack (the + * source file listed on the command line) then write out the data. */ + if ( includeDepth == 0 && id->machineSpec == 0 && id->machineName == 0 ) + id->curItem->data.write( ts, te-ts ); +} + +void Scanner::pass( int token, char *start, char *end ) +{ + if ( sectionPass ) + return; + + if ( importMachines ) + importToken( token, start, end ); + + pass(); +} + +/* + * The scanner for processing sections, includes, imports, etc. + */ + + +write: data + + +void Scanner::init( ) +{ + +write: init + +} + +bool Scanner::active() +{ + if ( ignoreSection ) + return false; + + if ( parser == 0 && ! parserExistsError ) { + id->error(scan_loc()) << "this specification has no name, nor does any previous" + " specification" << endl; + parserExistsError = true; + } + + if ( parser == 0 ) + return false; + + return true; +} + +InputLoc Scanner::scan_loc() +{ + return makeInputLoc( fileName, line, column ); +} + +void Scanner::updateCol() +{ + char *from = lastnl; + if ( from == 0 ) + from = ts; + column += te - from; + lastnl = 0; +} + +void Scanner::handleMachine() +{ + if ( sectionPass ) { + /* Assign a name to the machine. */ + char *machine = word; + + SectionDictEl *sdEl = id->sectionDict.find( machine ); + if ( sdEl == 0 ) { + sdEl = new SectionDictEl( machine ); + sdEl->value = new Section( machine ); + id->sectionDict.insert( sdEl ); + } + + section = sdEl->value; + } + else { + + /* Assign a name to the machine. */ + char *machine = word; + + if ( !importMachines && inclSectionTarg == 0 ) { + ignoreSection = false; + + ParserDictEl *pdEl = id->parserDict.find( machine ); + if ( pdEl == 0 ) { + pdEl = new ParserDictEl( machine ); + pdEl->value = new Parser6( id, fileName, machine, sectionLoc, + id->hostLang, id->minimizeLevel, id->minimizeOpt ); + pdEl->value->init(); + id->parserDict.insert( pdEl ); + id->parserList.append( pdEl->value ); + + /* Also into the parse data dict. This is the new style. */ + ParseDataDictEl *pddEl = new ParseDataDictEl( machine ); + pddEl->value = pdEl->value->pd; + id->parseDataDict.insert( pddEl ); + id->parseDataList.append( pddEl->value ); + } + + parser = pdEl->value; + } + else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) { + /* found include target */ + ignoreSection = false; + parser = inclToParser; + } + else { + /* ignoring section */ + ignoreSection = true; + parser = 0; + } + } +} + +void Scanner::handleInclude() +{ + if ( sectionPass ) + return; + + if ( active() ) { + char *inclSectionName = word; + const char **includeChecks = 0; + + /* Implement defaults for the input file and section name. */ + if ( inclSectionName == 0 ) + inclSectionName = parser->sectionName; + + if ( lit != 0 ) { + long length = 0; + bool caseInsensitive = false; + char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive ); + + includeChecks = parser->pd->id->makeIncludePathChecks( fileName, data ); + } + else { + char *test = new char[strlen(fileName)+1]; + strcpy( test, fileName ); + + includeChecks = new const char*[2]; + + includeChecks[0] = test; + includeChecks[1] = 0; + } + + long found = 0; + ifstream *inFile = parser->pd->id->tryOpenInclude( includeChecks, found ); + if ( inFile == 0 ) { + id->error(scan_loc()) << "include: failed to locate file" << endl; + const char **tried = includeChecks; + while ( *tried != 0 ) + id->error(scan_loc()) << "include: attempted: \"" << *tried++ << '\"' << endl; + } + else { + /* Don't include anything that's already been included. */ + if ( !parser->pd->duplicateInclude( includeChecks[found], inclSectionName ) ) { + parser->pd->includeHistory.push_back( IncludeHistoryItem( + includeChecks[found], inclSectionName ) ); + + Scanner scanner( id, includeChecks[found], *inFile, parser, + inclSectionName, includeDepth+1, false ); + scanner.do_scan( ); + } + + delete inFile; + } + } +} + +void Scanner::handleImport() +{ + if ( sectionPass ) + return; + + if ( active() ) { + long length = 0; + bool caseInsensitive = false; + char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive ); + + const char **importChecks = parser->pd->id->makeIncludePathChecks( fileName, data ); + + /* Open the input file for reading. */ + long found = 0; + ifstream *inFile = parser->pd->id->tryOpenInclude( importChecks, found ); + if ( inFile == 0 ) { + id->error(scan_loc()) << "import: could not open import file " << + "for reading" << endl; + const char **tried = importChecks; + while ( *tried != 0 ) + id->error(scan_loc()) << "import: attempted: \"" << *tried++ << '\"' << endl; + } + + Scanner scanner( id, importChecks[found], *inFile, parser, + 0, includeDepth+1, true ); + scanner.do_scan( ); + scanner.importToken( 0, 0, 0 ); + scanner.flushImport(); + delete inFile; + } +} + + + +void Scanner::token( int type, char c ) +{ + token( type, &c, &c + 1 ); +} + +void Scanner::token( int type ) +{ + token( type, 0, 0 ); +} + + +void Scanner::token( int type, char *start, char *end ) +{ + char *tokdata = 0; + int toklen = 0; + if ( start != 0 ) { + toklen = end-start; + tokdata = newTokdata( toklen + 1 ); + memcpy( tokdata, start, toklen ); + tokdata[toklen] = 0; + } + + processToken( type, tokdata, toklen ); +} + +void Scanner::processToken( int type, char *tokdata, int toklen ) +{ + int *p, *pe, *eof; + + if ( type < 0 ) + p = pe = eof = 0; + else { + p = &type; + pe = &type + 1; + eof = 0; + } + + +write: exec + + + updateCol(); + + /* Record the last token for use in controlling the scan of subsequent + * tokens. */ + lastToken = type; +} + +void Scanner::startSection( ) +{ + parserExistsError = false; + + sectionLoc.fileName = fileName; + sectionLoc.line = line; + sectionLoc.col = column; +} + +void Scanner::endSection( ) +{ + /* Execute the eof actions for the section parser. */ + processToken( -1, 0, 0 ); + + if ( sectionPass ) { + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::EndSection; + inputItem->loc.fileName = fileName; + inputItem->loc.line = line; + inputItem->loc.col = column; + id->inputItems.append( inputItem ); + if ( section != 0 ) { + inputItem->section = section; + section->lastReference = inputItem; + } + + if ( includeDepth == 0 ) { + if ( id->machineSpec == 0 && id->machineName == 0 ) { + /* The end section may include a newline on the end, so + * we use the last line, which will count the newline. */ + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::HostData; + inputItem->loc.fileName = fileName; + inputItem->loc.line = line; + inputItem->loc.col = column; + id->inputItems.append( inputItem ); + } + } + } + else { + /* Close off the section with the parser. */ + if ( includeDepth == 0 && active() ) { + InputLoc loc; + loc.fileName = fileName; + loc.line = line; + loc.col = column; + + parser->token( loc, TK_EndSection, 0, 0 ); + + id->curItem = id->curItem->next; + + if ( parser != 0 ) { + id->curItem->pd = parser->pd; + id->curItem->parser = parser; + } + + id->checkLastRef( id->curItem ); + } + + if ( includeDepth == 0 ) { + if ( id->machineSpec == 0 && id->machineName == 0 ) { + id->curItem = id->curItem->next; + id->checkLastRef( id->curItem ); + } + } + } +} + + + + +write: data + + +void Scanner::do_scan() +{ + int bufsize = 8; + char *buf = new char[bufsize]; + int cs, act, have = 0; + int top; + + /* The stack is two deep, one level for going into ragel defs from the main + * machines which process outside code, and another for going into or literals + * from either a ragel spec, or a regular expression. */ + int stack[2]; + int curly_count = 0; + bool execute = true; + bool singleLineSpec = false; + InlineBlockType inlineBlockType = CurlyDelimited; + + line = 1; + column = 1; + lastnl = 0; + + /* Init the section parser and the character scanner. */ + init(); + +write: init + + + /* Set up the start state. FIXME: After 5.20 is released the nocs write + * init option should be used, the main machine eliminated and this statement moved + * above the write init. */ + cs = rlscan_en_main; + + while ( execute ) { + char *p = buf + have; + int space = bufsize - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. Grow it. */ + bufsize = bufsize * 2; + char *newbuf = new char[bufsize]; + + /* Recompute p and space. */ + p = newbuf + have; + space = bufsize - have; + + /* Patch up pointers possibly in use. */ + if ( ts != 0 ) + ts = newbuf + ( ts - buf ); + te = newbuf + ( te - buf ); + + /* Copy the new buffer in. */ + memcpy( newbuf, buf, have ); + delete[] buf; + buf = newbuf; + } + + input.read( p, space ); + int len = input.gcount(); + char *pe = p + len; + + /* If we see eof then append the eof var. */ + char *eof = 0; + if ( len == 0 ) { + eof = pe; + execute = false; + } + + +write: exec + + + /* Check if we failed. */ + if ( cs == rlscan_error ) { + /* Machine failed before finding a token. I'm not yet sure if this + * is reachable. */ + id->error(scan_loc()) << "scanner error" << endl; + id->abortCompile( 1 ); + } + + /* Decide if we need to preserve anything. */ + char *preserve = ts; + + /* Now set up the prefix. */ + if ( preserve == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + have = pe - preserve; + memmove( buf, preserve, have ); + unsigned int shiftback = preserve - buf; + if ( ts != 0 ) + ts -= shiftback; + te -= shiftback; + + preserve = buf; + } + } + + delete[] buf; +} + +machine name: inline_token_scan +IL_Comment = lit +IL_Literal = lit +IL_Symbol = lit +IL_WhiteSpace = lit +IMP_Define = lit +IMP_Literal = lit +IMP_UInt = lit +IMP_Word = lit +KW_Access = lit +KW_Action = lit +KW_AlphType = lit +KW_Break = lit +KW_Call = lit +KW_Char = lit +KW_CurState = lit +KW_Entry = lit +KW_Eof = lit +KW_Err = lit +KW_Exec = lit +KW_Export = lit +KW_From = lit +KW_GetKey = lit +KW_Goto = lit +KW_Hold = lit +KW_Import = lit +KW_InWhen = lit +KW_Include = lit +KW_Length = lit +KW_Lerr = lit +KW_Machine = lit +KW_Nbreak = lit +KW_Ncall = lit +KW_Next = lit +KW_NfaPostPop = lit +KW_NfaPrePush = lit +KW_Nret = lit +KW_OutWhen = lit +KW_PChar = lit +KW_PostPop = lit +KW_PrePush = lit +KW_Range = lit +KW_Ret = lit +KW_TargState = lit +KW_To = lit +KW_Variable = lit +KW_When = lit +KW_Write = lit +Parser6_tk_eof = lit +RE_Char = lit +RE_Dash = lit +RE_Dot = lit +RE_Slash = lit +RE_SqClose = lit +RE_SqOpen = lit +RE_SqOpenNeg = lit +RE_Star = lit +TK_AllCond = lit +TK_AllEOF = lit +TK_AllFromState = lit +TK_AllGblError = lit +TK_AllLocalError = lit +TK_AllToState = lit +TK_Arrow = lit +TK_BarEquals = lit +TK_BarStar = lit +TK_CloseColon = lit +TK_ColonCondOpen = lit +TK_ColonCondPlusOpen = lit +TK_ColonCondStarOpen = lit +TK_ColonEquals = lit +TK_ColonGt = lit +TK_ColonGtGt = lit +TK_ColonNfaOpen = lit +TK_ColonNoMaxOpen = lit +TK_DashDash = lit +TK_DotDot = lit +TK_DotDotIndep = lit +TK_DoubleArrow = lit +TK_EndSection = lit +TK_FinalEOF = lit +TK_FinalFromState = lit +TK_FinalGblError = lit +TK_FinalLocalError = lit +TK_FinalToState = lit +TK_Hex = lit +TK_LeavingCond = lit +TK_Literal = lit +TK_LtColon = lit +TK_Middle = lit +TK_MiddleEOF = lit +TK_MiddleFromState = lit +TK_MiddleGblError = lit +TK_MiddleLocalError = lit +TK_MiddleToState = lit +TK_NameSep = lit +TK_NotFinalEOF = lit +TK_NotFinalFromState = lit +TK_NotFinalGblError = lit +TK_NotFinalLocalError = lit +TK_NotFinalToState = lit +TK_NotStartEOF = lit +TK_NotStartFromState = lit +TK_NotStartGblError = lit +TK_NotStartLocalError = lit +TK_NotStartToState = lit +TK_StarStar = lit +TK_StartCond = lit +TK_StartEOF = lit +TK_StartFromState = lit +TK_StartGblError = lit +TK_StartLocalError = lit +TK_StartToState = lit +TK_SubstRef = lit +TK_UInt = lit +TK_Word = lit +alnum = builtin +alpha = builtin +any = builtin +ascii = builtin +cntrl = builtin +digit = builtin +empty = builtin +extend = builtin +graph = builtin +lower = builtin +main = |* ref . ref . ref; ref . lit . ref; ref . ref . ref; ref . lit . ref; ref; *| +null = builtin +print = builtin +punct = builtin +space = builtin +upper = builtin +xdigit = builtin +zlen = builtin +machine name: section_parse +IL_Comment = lit +IL_Literal = lit +IL_Symbol = lit +IL_WhiteSpace = lit +IMP_Define = lit +IMP_Literal = lit +IMP_UInt = lit +IMP_Word = lit +KW_Access = lit +KW_Action = lit +KW_AlphType = lit +KW_Break = lit +KW_Call = lit +KW_Char = lit +KW_CurState = lit +KW_Entry = lit +KW_Eof = lit +KW_Err = lit +KW_Exec = lit +KW_Export = lit +KW_From = lit +KW_GetKey = lit +KW_Goto = lit +KW_Hold = lit +KW_Import = lit +KW_InWhen = lit +KW_Include = lit +KW_Length = lit +KW_Lerr = lit +KW_Machine = lit +KW_Nbreak = lit +KW_Ncall = lit +KW_Next = lit +KW_NfaPostPop = lit +KW_NfaPrePush = lit +KW_Nret = lit +KW_OutWhen = lit +KW_PChar = lit +KW_PostPop = lit +KW_PrePush = lit +KW_Range = lit +KW_Ret = lit +KW_TargState = lit +KW_To = lit +KW_Variable = lit +KW_When = lit +KW_Write = lit +Parser6_tk_eof = lit +RE_Char = lit +RE_Dash = lit +RE_Dot = lit +RE_Slash = lit +RE_SqClose = lit +RE_SqOpen = lit +RE_SqOpenNeg = lit +RE_Star = lit +TK_AllCond = lit +TK_AllEOF = lit +TK_AllFromState = lit +TK_AllGblError = lit +TK_AllLocalError = lit +TK_AllToState = lit +TK_Arrow = lit +TK_BarEquals = lit +TK_BarStar = lit +TK_CloseColon = lit +TK_ColonCondOpen = lit +TK_ColonCondPlusOpen = lit +TK_ColonCondStarOpen = lit +TK_ColonEquals = lit +TK_ColonGt = lit +TK_ColonGtGt = lit +TK_ColonNfaOpen = lit +TK_ColonNoMaxOpen = lit +TK_DashDash = lit +TK_DotDot = lit +TK_DotDotIndep = lit +TK_DoubleArrow = lit +TK_EndSection = lit +TK_FinalEOF = lit +TK_FinalFromState = lit +TK_FinalGblError = lit +TK_FinalLocalError = lit +TK_FinalToState = lit +TK_Hex = lit +TK_LeavingCond = lit +TK_Literal = lit +TK_LtColon = lit +TK_Middle = lit +TK_MiddleEOF = lit +TK_MiddleFromState = lit +TK_MiddleGblError = lit +TK_MiddleLocalError = lit +TK_MiddleToState = lit +TK_NameSep = lit +TK_NotFinalEOF = lit +TK_NotFinalFromState = lit +TK_NotFinalGblError = lit +TK_NotFinalLocalError = lit +TK_NotFinalToState = lit +TK_NotStartEOF = lit +TK_NotStartFromState = lit +TK_NotStartGblError = lit +TK_NotStartLocalError = lit +TK_NotStartToState = lit +TK_StarStar = lit +TK_StartCond = lit +TK_StartEOF = lit +TK_StartFromState = lit +TK_StartGblError = lit +TK_StartLocalError = lit +TK_StartToState = lit +TK_SubstRef = lit +TK_UInt = lit +TK_Word = lit +alnum = builtin +alpha = builtin +any = builtin +ascii = builtin +cntrl = builtin +digit = builtin +empty = builtin +everything_else = ^( ref | ref | ref | ref ) +extend = builtin +graph = builtin +import_stmt = ( ref . ref . lit ) +include_names = ( ref . ( ref )? | ref ) +include_stmt = ( ref . ref . lit ) +lower = builtin +machine_stmt = ( ref . ref . lit ) +main = ( ref | ref | ref | ref | ref )* +null = builtin +print = builtin +punct = builtin +space = builtin +upper = builtin +write_stmt = ( ref . ( ref )+ . lit ) +xdigit = builtin +zlen = builtin +machine name: rlscan +EOF = lit +NL = lit +alnum = builtin +alpha = builtin +any = builtin +ascii = builtin +c_comment = lit . ( ref | ref )* :>> lit +c_cpp_comment = ref | ref +cntrl = builtin +cpp_comment = lit . or_expr* . ref +d_literal = lit . ( or_expr | ref | lit . ( ref | ref ) )* . lit +digit = builtin +empty = builtin +extend = builtin +graph = builtin +hex_number = lit . or_expr+ +host_re_literal = lit . ( or_expr | ref | lit . ( ref | ref ) )* . lit +ident = ( ref | lit ) . ( ref | ref | lit )* +inline_code = |* lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; ref; ref; ref; ( ref | ref ); ref+; ref; lit; lit; lit . or_expr . or_expr*; or_expr; or_expr; lit; lit; ref; ref; *| +lower = builtin +main = |* lit; ref; ref; ref; ( ref | ref ); lit; lit; ref+; ref; ref; *| +null = builtin +number = ref+ +ocaml_ident = ( ref | lit ) . ( ref | ref | lit )* . lit? +or_literal = |* lit; lit; lit; lit; lit; lit; lit; lit; lit; lit . ref; lit; lit; ref; or_expr; *| +parser_def = |* lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; ref; ref; ref; ( ref | ref ) . or_expr?; lit; lit; lit; ref; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; or_expr+; ref; lit; ref; ref; *| +pound_comment = lit . or_expr* . ref +print = builtin +punct = builtin +ragel_re_literal = |* lit; lit; lit; lit; lit; lit; lit; lit; lit; lit . ref; lit . or_expr?; lit; lit; lit; lit; ref; or_expr; *| +ruby_comment = lit . or_expr* . ref +s_literal = lit . ( or_expr | ref | lit . ( ref | ref ) )* . lit +space = builtin +upper = builtin +whitespace = or_expr | ref +write_statement = |* ref; or_expr+; lit; ref; *| +xdigit = builtin +zlen = builtin diff --git a/test/rlparse.d/case/rlscan--reduce-frontend.exp b/test/rlparse.d/case/rlscan--reduce-frontend.exp new file mode 100644 index 00000000..5ae543b7 --- /dev/null +++ b/test/rlparse.d/case/rlscan--reduce-frontend.exp @@ -0,0 +1,894 @@ +/* + * Copyright 2006-2007 Adrian Thurston <thurston@complang.org> + * Copyright 2011 Josef Goettgens + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <fstream> +#include <string.h> + +#include "ragel.h" +#include "rlscan.h" +#include "inputdata.h" + +//#define LOG_TOKENS + +using std::ifstream; +using std::istream; +using std::ostream; +using std::endl; + +enum InlineBlockType +{ + CurlyDelimited, + SemiTerminated +}; + +char *newTokdata( int toklen ) +{ + char *tokdata = new char[sizeof(TokHead) + toklen + 1]; + return tokdata + sizeof(TokHead); +} + +void deleteTokdata( char *tokdata ) +{ + if ( tokdata ) + delete[] ( tokdata - sizeof(TokHead) ); +} + +void linkTokdata( Parser6 *parser, char *tokdata ) +{ + TokHead *head = (TokHead*)( tokdata - sizeof(TokHead) ); + head->next = parser->tokHead; + parser->tokHead = head; +} + +void clearTokdata( Parser6 *parser ) +{ + while ( parser->tokHead != 0 ) { + TokHead *next = parser->tokHead->next; + delete[] (char*)parser->tokHead; + parser->tokHead = next; + } +} + +/* + * The Scanner for Importing + */ + + + + + +write: data + + +void Scanner::flushImport() +{ + int *p = token_data; + int *pe = token_data + cur_token; + int *eof = 0; + + +write: init +write: exec + + + if ( tok_ts == 0 ) + cur_token = 0; + else { + cur_token = pe - tok_ts; + int ts_offset = tok_ts - token_data; + memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) ); + memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) ); + memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) ); + } +} + +void Scanner::directToParser( Parser6 *toParser, const char *tokFileName, int tokLine, + int tokColumn, int type, char *tokdata, int toklen ) +{ + InputLoc loc; + + #ifdef LOG_TOKENS + cerr << "scanner:" << tokLine << ":" << tokColumn << + ": sending token to the parser " << Parser6_lelNames[type]; + cerr << " " << toklen; + if ( tokdata != 0 ) + cerr << " " << tokdata; + cerr << endl; + #endif + + loc.fileName = tokFileName; + loc.line = tokLine; + loc.col = tokColumn; + + toParser->token( loc, type, tokdata, toklen ); +} + +void Scanner::importToken( int token, char *start, char *end ) +{ + if ( cur_token == max_tokens ) + flushImport(); + + token_data[cur_token] = token; + if ( start == 0 ) { + token_strings[cur_token] = 0; + token_lens[cur_token] = 0; + } + else { + int toklen = end-start; + token_lens[cur_token] = toklen; + token_strings[cur_token] = new char[toklen+1]; + memcpy( token_strings[cur_token], start, toklen ); + token_strings[cur_token][toklen] = 0; + } + cur_token++; +} + +void Scanner::pass() +{ + if ( sectionPass ) + return; + + updateCol(); + + /* If no errors and we are at the bottom of the include stack (the + * source file listed on the command line) then write out the data. */ + if ( includeDepth == 0 && id->machineSpec == 0 && id->machineName == 0 ) + id->curItem->data.write( ts, te-ts ); +} + +void Scanner::pass( int token, char *start, char *end ) +{ + if ( sectionPass ) + return; + + if ( importMachines ) + importToken( token, start, end ); + + pass(); +} + +/* + * The scanner for processing sections, includes, imports, etc. + */ + + +write: data + + +void Scanner::init( ) +{ + +write: init + +} + +bool Scanner::active() +{ + if ( ignoreSection ) + return false; + + if ( parser == 0 && ! parserExistsError ) { + id->error(scan_loc()) << "this specification has no name, nor does any previous" + " specification" << endl; + parserExistsError = true; + } + + if ( parser == 0 ) + return false; + + return true; +} + +InputLoc Scanner::scan_loc() +{ + return makeInputLoc( fileName, line, column ); +} + +void Scanner::updateCol() +{ + char *from = lastnl; + if ( from == 0 ) + from = ts; + column += te - from; + lastnl = 0; +} + +void Scanner::handleMachine() +{ + if ( sectionPass ) { + /* Assign a name to the machine. */ + char *machine = word; + + SectionDictEl *sdEl = id->sectionDict.find( machine ); + if ( sdEl == 0 ) { + sdEl = new SectionDictEl( machine ); + sdEl->value = new Section( machine ); + id->sectionDict.insert( sdEl ); + } + + section = sdEl->value; + } + else { + + /* Assign a name to the machine. */ + char *machine = word; + + if ( !importMachines && inclSectionTarg == 0 ) { + ignoreSection = false; + + ParserDictEl *pdEl = id->parserDict.find( machine ); + if ( pdEl == 0 ) { + pdEl = new ParserDictEl( machine ); + pdEl->value = new Parser6( id, fileName, machine, sectionLoc, + id->hostLang, id->minimizeLevel, id->minimizeOpt ); + pdEl->value->init(); + id->parserDict.insert( pdEl ); + id->parserList.append( pdEl->value ); + + /* Also into the parse data dict. This is the new style. */ + ParseDataDictEl *pddEl = new ParseDataDictEl( machine ); + pddEl->value = pdEl->value->pd; + id->parseDataDict.insert( pddEl ); + id->parseDataList.append( pddEl->value ); + } + + parser = pdEl->value; + } + else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) { + /* found include target */ + ignoreSection = false; + parser = inclToParser; + } + else { + /* ignoring section */ + ignoreSection = true; + parser = 0; + } + } +} + +void Scanner::handleInclude() +{ + if ( sectionPass ) + return; + + if ( active() ) { + char *inclSectionName = word; + const char **includeChecks = 0; + + /* Implement defaults for the input file and section name. */ + if ( inclSectionName == 0 ) + inclSectionName = parser->sectionName; + + if ( lit != 0 ) { + long length = 0; + bool caseInsensitive = false; + char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive ); + + includeChecks = parser->pd->id->makeIncludePathChecks( fileName, data ); + } + else { + char *test = new char[strlen(fileName)+1]; + strcpy( test, fileName ); + + includeChecks = new const char*[2]; + + includeChecks[0] = test; + includeChecks[1] = 0; + } + + long found = 0; + ifstream *inFile = parser->pd->id->tryOpenInclude( includeChecks, found ); + if ( inFile == 0 ) { + id->error(scan_loc()) << "include: failed to locate file" << endl; + const char **tried = includeChecks; + while ( *tried != 0 ) + id->error(scan_loc()) << "include: attempted: \"" << *tried++ << '\"' << endl; + } + else { + /* Don't include anything that's already been included. */ + if ( !parser->pd->duplicateInclude( includeChecks[found], inclSectionName ) ) { + parser->pd->includeHistory.push_back( IncludeHistoryItem( + includeChecks[found], inclSectionName ) ); + + Scanner scanner( id, includeChecks[found], *inFile, parser, + inclSectionName, includeDepth+1, false ); + scanner.do_scan( ); + } + + delete inFile; + } + } +} + +void Scanner::handleImport() +{ + if ( sectionPass ) + return; + + if ( active() ) { + long length = 0; + bool caseInsensitive = false; + char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive ); + + const char **importChecks = parser->pd->id->makeIncludePathChecks( fileName, data ); + + /* Open the input file for reading. */ + long found = 0; + ifstream *inFile = parser->pd->id->tryOpenInclude( importChecks, found ); + if ( inFile == 0 ) { + id->error(scan_loc()) << "import: could not open import file " << + "for reading" << endl; + const char **tried = importChecks; + while ( *tried != 0 ) + id->error(scan_loc()) << "import: attempted: \"" << *tried++ << '\"' << endl; + } + + Scanner scanner( id, importChecks[found], *inFile, parser, + 0, includeDepth+1, true ); + scanner.do_scan( ); + scanner.importToken( 0, 0, 0 ); + scanner.flushImport(); + delete inFile; + } +} + + + + +void Scanner::token( int type, char c ) +{ + token( type, &c, &c + 1 ); +} + +void Scanner::token( int type ) +{ + token( type, 0, 0 ); +} + + +void Scanner::token( int type, char *start, char *end ) +{ + char *tokdata = 0; + int toklen = 0; + if ( start != 0 ) { + toklen = end-start; + tokdata = newTokdata( toklen + 1 ); + memcpy( tokdata, start, toklen ); + tokdata[toklen] = 0; + } + + processToken( type, tokdata, toklen ); +} + +void Scanner::processToken( int type, char *tokdata, int toklen ) +{ + int *p, *pe, *eof; + + if ( type < 0 ) + p = pe = eof = 0; + else { + p = &type; + pe = &type + 1; + eof = 0; + } + + +write: exec + + + updateCol(); + + /* Record the last token for use in controlling the scan of subsequent + * tokens. */ + lastToken = type; +} + +void Scanner::startSection( ) +{ + parserExistsError = false; + + sectionLoc.fileName = fileName; + sectionLoc.line = line; + sectionLoc.col = column; +} + +void Scanner::endSection( ) +{ + /* Execute the eof actions for the section parser. */ + processToken( -1, 0, 0 ); + + if ( sectionPass ) { + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::EndSection; + inputItem->loc.fileName = fileName; + inputItem->loc.line = line; + inputItem->loc.col = column; + id->inputItems.append( inputItem ); + if ( section != 0 ) { + inputItem->section = section; + section->lastReference = inputItem; + } + + if ( includeDepth == 0 ) { + if ( id->machineSpec == 0 && id->machineName == 0 ) { + /* The end section may include a newline on the end, so + * we use the last line, which will count the newline. */ + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::HostData; + inputItem->loc.fileName = fileName; + inputItem->loc.line = line; + inputItem->loc.col = column; + id->inputItems.append( inputItem ); + } + } + } + else { + /* Close off the section with the parser. */ + if ( includeDepth == 0 && active() ) { + InputLoc loc; + loc.fileName = fileName; + loc.line = line; + loc.col = column; + + parser->token( loc, TK_EndSection, 0, 0 ); + + id->curItem = id->curItem->next; + + if ( parser != 0 ) { + id->curItem->pd = parser->pd; + id->curItem->parser = parser; + } + + id->checkLastRef( id->curItem ); + } + + if ( includeDepth == 0 ) { + if ( id->machineSpec == 0 && id->machineName == 0 ) { + id->curItem = id->curItem->next; + id->checkLastRef( id->curItem ); + } + } + } +} + + + + + +write: data + + +void Scanner::do_scan() +{ + int bufsize = 8; + char *buf = new char[bufsize]; + int cs, act, have = 0; + int top; + + /* The stack is two deep, one level for going into ragel defs from the main + * machines which process outside code, and another for going into or literals + * from either a ragel spec, or a regular expression. */ + int stack[2]; + int curly_count = 0; + bool execute = true; + bool singleLineSpec = false; + InlineBlockType inlineBlockType = CurlyDelimited; + + line = 1; + column = 1; + lastnl = 0; + + /* Init the section parser and the character scanner. */ + init(); + +write: init + + + /* Set up the start state. FIXME: After 5.20 is released the nocs write + * init option should be used, the main machine eliminated and this statement moved + * above the write init. */ + cs = rlscan_en_main; + + while ( execute ) { + char *p = buf + have; + int space = bufsize - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. Grow it. */ + bufsize = bufsize * 2; + char *newbuf = new char[bufsize]; + + /* Recompute p and space. */ + p = newbuf + have; + space = bufsize - have; + + /* Patch up pointers possibly in use. */ + if ( ts != 0 ) + ts = newbuf + ( ts - buf ); + te = newbuf + ( te - buf ); + + /* Copy the new buffer in. */ + memcpy( newbuf, buf, have ); + delete[] buf; + buf = newbuf; + } + + input.read( p, space ); + int len = input.gcount(); + char *pe = p + len; + + /* If we see eof then append the eof var. */ + char *eof = 0; + if ( len == 0 ) { + eof = pe; + execute = false; + } + + +write: exec + + + /* Check if we failed. */ + if ( cs == rlscan_error ) { + /* Machine failed before finding a token. I'm not yet sure if this + * is reachable. */ + id->error(scan_loc()) << "scanner error" << endl; + id->abortCompile( 1 ); + } + + /* Decide if we need to preserve anything. */ + char *preserve = ts; + + /* Now set up the prefix. */ + if ( preserve == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + have = pe - preserve; + memmove( buf, preserve, have ); + unsigned int shiftback = preserve - buf; + if ( ts != 0 ) + ts -= shiftback; + te -= shiftback; + + preserve = buf; + } + } + + delete[] buf; +} + +machine name: inline_token_scan +IL_Comment = lit +IL_Literal = lit +IL_Symbol = lit +IL_WhiteSpace = lit +IMP_Define = lit +IMP_Literal = lit +IMP_UInt = lit +IMP_Word = lit +KW_Access = lit +KW_Action = lit +KW_AlphType = lit +KW_Break = lit +KW_Call = lit +KW_Char = lit +KW_CurState = lit +KW_Entry = lit +KW_Eof = lit +KW_Err = lit +KW_Exec = lit +KW_Export = lit +KW_From = lit +KW_GetKey = lit +KW_Goto = lit +KW_Hold = lit +KW_Import = lit +KW_InWhen = lit +KW_Include = lit +KW_Length = lit +KW_Lerr = lit +KW_Machine = lit +KW_Nbreak = lit +KW_Ncall = lit +KW_Next = lit +KW_NfaPostPop = lit +KW_NfaPrePush = lit +KW_Nret = lit +KW_OutWhen = lit +KW_PChar = lit +KW_PostPop = lit +KW_PrePush = lit +KW_Range = lit +KW_Ret = lit +KW_TargState = lit +KW_To = lit +KW_Variable = lit +KW_When = lit +KW_Write = lit +Parser6_tk_eof = lit +RE_Char = lit +RE_Dash = lit +RE_Dot = lit +RE_Slash = lit +RE_SqClose = lit +RE_SqOpen = lit +RE_SqOpenNeg = lit +RE_Star = lit +TK_AllCond = lit +TK_AllEOF = lit +TK_AllFromState = lit +TK_AllGblError = lit +TK_AllLocalError = lit +TK_AllToState = lit +TK_Arrow = lit +TK_BarEquals = lit +TK_BarStar = lit +TK_CloseColon = lit +TK_ColonCondOpen = lit +TK_ColonCondPlusOpen = lit +TK_ColonCondStarOpen = lit +TK_ColonEquals = lit +TK_ColonGt = lit +TK_ColonGtGt = lit +TK_ColonNfaOpen = lit +TK_ColonNoMaxOpen = lit +TK_DashDash = lit +TK_DotDot = lit +TK_DotDotIndep = lit +TK_DoubleArrow = lit +TK_EndSection = lit +TK_FinalEOF = lit +TK_FinalFromState = lit +TK_FinalGblError = lit +TK_FinalLocalError = lit +TK_FinalToState = lit +TK_Hex = lit +TK_LeavingCond = lit +TK_Literal = lit +TK_LtColon = lit +TK_Middle = lit +TK_MiddleEOF = lit +TK_MiddleFromState = lit +TK_MiddleGblError = lit +TK_MiddleLocalError = lit +TK_MiddleToState = lit +TK_NameSep = lit +TK_NotFinalEOF = lit +TK_NotFinalFromState = lit +TK_NotFinalGblError = lit +TK_NotFinalLocalError = lit +TK_NotFinalToState = lit +TK_NotStartEOF = lit +TK_NotStartFromState = lit +TK_NotStartGblError = lit +TK_NotStartLocalError = lit +TK_NotStartToState = lit +TK_StarStar = lit +TK_StartCond = lit +TK_StartEOF = lit +TK_StartFromState = lit +TK_StartGblError = lit +TK_StartLocalError = lit +TK_StartToState = lit +TK_SubstRef = lit +TK_UInt = lit +TK_Word = lit +alnum = builtin +alpha = builtin +any = builtin +ascii = builtin +cntrl = builtin +digit = builtin +empty = builtin +extend = builtin +graph = builtin +lower = builtin +main = |* ref . ref . ref; ref . lit . ref; ref . ref . ref; ref . lit . ref; ref; *| +null = builtin +print = builtin +punct = builtin +space = builtin +upper = builtin +xdigit = builtin +zlen = builtin +machine name: section_parse +IL_Comment = lit +IL_Literal = lit +IL_Symbol = lit +IL_WhiteSpace = lit +IMP_Define = lit +IMP_Literal = lit +IMP_UInt = lit +IMP_Word = lit +KW_Access = lit +KW_Action = lit +KW_AlphType = lit +KW_Break = lit +KW_Call = lit +KW_Char = lit +KW_CurState = lit +KW_Entry = lit +KW_Eof = lit +KW_Err = lit +KW_Exec = lit +KW_Export = lit +KW_From = lit +KW_GetKey = lit +KW_Goto = lit +KW_Hold = lit +KW_Import = lit +KW_InWhen = lit +KW_Include = lit +KW_Length = lit +KW_Lerr = lit +KW_Machine = lit +KW_Nbreak = lit +KW_Ncall = lit +KW_Next = lit +KW_NfaPostPop = lit +KW_NfaPrePush = lit +KW_Nret = lit +KW_OutWhen = lit +KW_PChar = lit +KW_PostPop = lit +KW_PrePush = lit +KW_Range = lit +KW_Ret = lit +KW_TargState = lit +KW_To = lit +KW_Variable = lit +KW_When = lit +KW_Write = lit +Parser6_tk_eof = lit +RE_Char = lit +RE_Dash = lit +RE_Dot = lit +RE_Slash = lit +RE_SqClose = lit +RE_SqOpen = lit +RE_SqOpenNeg = lit +RE_Star = lit +TK_AllCond = lit +TK_AllEOF = lit +TK_AllFromState = lit +TK_AllGblError = lit +TK_AllLocalError = lit +TK_AllToState = lit +TK_Arrow = lit +TK_BarEquals = lit +TK_BarStar = lit +TK_CloseColon = lit +TK_ColonCondOpen = lit +TK_ColonCondPlusOpen = lit +TK_ColonCondStarOpen = lit +TK_ColonEquals = lit +TK_ColonGt = lit +TK_ColonGtGt = lit +TK_ColonNfaOpen = lit +TK_ColonNoMaxOpen = lit +TK_DashDash = lit +TK_DotDot = lit +TK_DotDotIndep = lit +TK_DoubleArrow = lit +TK_EndSection = lit +TK_FinalEOF = lit +TK_FinalFromState = lit +TK_FinalGblError = lit +TK_FinalLocalError = lit +TK_FinalToState = lit +TK_Hex = lit +TK_LeavingCond = lit +TK_Literal = lit +TK_LtColon = lit +TK_Middle = lit +TK_MiddleEOF = lit +TK_MiddleFromState = lit +TK_MiddleGblError = lit +TK_MiddleLocalError = lit +TK_MiddleToState = lit +TK_NameSep = lit +TK_NotFinalEOF = lit +TK_NotFinalFromState = lit +TK_NotFinalGblError = lit +TK_NotFinalLocalError = lit +TK_NotFinalToState = lit +TK_NotStartEOF = lit +TK_NotStartFromState = lit +TK_NotStartGblError = lit +TK_NotStartLocalError = lit +TK_NotStartToState = lit +TK_StarStar = lit +TK_StartCond = lit +TK_StartEOF = lit +TK_StartFromState = lit +TK_StartGblError = lit +TK_StartLocalError = lit +TK_StartToState = lit +TK_SubstRef = lit +TK_UInt = lit +TK_Word = lit +alnum = builtin +alpha = builtin +any = builtin +ascii = builtin +cntrl = builtin +digit = builtin +empty = builtin +everything_else = ^( ref | ref | ref | ref ) +extend = builtin +graph = builtin +import_stmt = ( ref . ref . lit ) +include_names = ( ref . ( ref )? | ref ) +include_stmt = ( ref . ref . lit ) +lower = builtin +machine_stmt = ( ref . ref . lit ) +main = ( ref | ref | ref | ref | ref )* +null = builtin +print = builtin +punct = builtin +space = builtin +upper = builtin +write_stmt = ( ref . ( ref )+ . lit ) +xdigit = builtin +zlen = builtin +machine name: rlscan +EOF = lit +NL = lit +alnum = builtin +alpha = builtin +any = builtin +ascii = builtin +c_comment = lit . ( ref | ref )* :>> lit +c_cpp_comment = ref | ref +cntrl = builtin +cpp_comment = lit . or_expr* . ref +d_literal = lit . ( or_expr | ref | lit . ( ref | ref ) )* . lit +digit = builtin +empty = builtin +extend = builtin +graph = builtin +hex_number = lit . or_expr+ +host_re_literal = lit . ( or_expr | ref | lit . ( ref | ref ) )* . lit +ident = ( ref | lit ) . ( ref | ref | lit )* +inline_code = |* lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; ref; ref; ref; ( ref | ref ); ref+; ref; lit; lit; lit . or_expr . or_expr*; or_expr; or_expr; lit; lit; ref; ref; *| +lower = builtin +main = |* lit; ref; ref; ref; ( ref | ref ); lit; lit; ref+; ref; ref; *| +null = builtin +number = ref+ +ocaml_ident = ( ref | lit ) . ( ref | ref | lit )* . lit? +or_literal = |* lit; lit; lit; lit; lit; lit; lit; lit; lit; lit . ref; lit; lit; ref; or_expr; *| +parser_def = |* lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; ref; ref; ref; ( ref | ref ) . or_expr?; lit; lit; lit; ref; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; lit; or_expr+; ref; lit; ref; ref; *| +pound_comment = lit . or_expr* . ref +print = builtin +punct = builtin +ragel_re_literal = |* lit; lit; lit; lit; lit; lit; lit; lit; lit; lit . ref; lit . or_expr?; lit; lit; lit; lit; ref; or_expr; *| +ruby_comment = lit . or_expr* . ref +s_literal = lit . ( or_expr | ref | lit . ( ref | ref ) )* . lit +space = builtin +upper = builtin +whitespace = or_expr | ref +write_statement = |* ref; or_expr+; lit; ref; *| +xdigit = builtin +zlen = builtin diff --git a/test/rlparse.d/case/rlscan.rl b/test/rlparse.d/case/rlscan.rl new file mode 100644 index 00000000..c28a74ad --- /dev/null +++ b/test/rlparse.d/case/rlscan.rl @@ -0,0 +1,1192 @@ +/* + * Copyright 2006-2007 Adrian Thurston <thurston@complang.org> + * Copyright 2011 Josef Goettgens + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <fstream> +#include <string.h> + +#include "ragel.h" +#include "rlscan.h" +#include "inputdata.h" + +//#define LOG_TOKENS + +using std::ifstream; +using std::istream; +using std::ostream; +using std::endl; + +enum InlineBlockType +{ + CurlyDelimited, + SemiTerminated +}; + +char *newTokdata( int toklen ) +{ + char *tokdata = new char[sizeof(TokHead) + toklen + 1]; + return tokdata + sizeof(TokHead); +} + +void deleteTokdata( char *tokdata ) +{ + if ( tokdata ) + delete[] ( tokdata - sizeof(TokHead) ); +} + +void linkTokdata( Parser6 *parser, char *tokdata ) +{ + TokHead *head = (TokHead*)( tokdata - sizeof(TokHead) ); + head->next = parser->tokHead; + parser->tokHead = head; +} + +void clearTokdata( Parser6 *parser ) +{ + while ( parser->tokHead != 0 ) { + TokHead *next = parser->tokHead->next; + delete[] (char*)parser->tokHead; + parser->tokHead = next; + } +} + +/* + * The Scanner for Importing + */ + +%%{ + machine inline_token_scan; + alphtype int; + access tok_; + + # Import scanner tokens. + import "rlparse.h"; + + main := |* + # Define of number. + IMP_Define IMP_Word IMP_UInt => { + int base = tok_ts - token_data; + int nameOff = 1; + int numOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_UInt, + token_strings[base+numOff], token_lens[base+numOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }; + + # Assignment of number. + IMP_Word '=' IMP_UInt => { + int base = tok_ts - token_data; + int nameOff = 0; + int numOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_UInt, + token_strings[base+numOff], token_lens[base+numOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }; + + # Define of literal. + IMP_Define IMP_Word IMP_Literal => { + int base = tok_ts - token_data; + int nameOff = 1; + int litOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_Literal, + token_strings[base+litOff], token_lens[base+litOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }; + + # Assignment of literal. + IMP_Word '=' IMP_Literal => { + int base = tok_ts - token_data; + int nameOff = 0; + int litOff = 2; + + directToParser( inclToParser, fileName, line, column, TK_Word, + token_strings[base+nameOff], token_lens[base+nameOff] ); + directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); + directToParser( inclToParser, fileName, line, column, TK_Literal, + token_strings[base+litOff], token_lens[base+litOff] ); + directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); + }; + + # Catch everything else. + any; + *|; +}%% + +%% write data; + +void Scanner::flushImport() +{ + int *p = token_data; + int *pe = token_data + cur_token; + int *eof = 0; + + %%{ + machine inline_token_scan; + write init; + write exec; + }%% + + if ( tok_ts == 0 ) + cur_token = 0; + else { + cur_token = pe - tok_ts; + int ts_offset = tok_ts - token_data; + memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) ); + memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) ); + memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) ); + } +} + +void Scanner::directToParser( Parser6 *toParser, const char *tokFileName, int tokLine, + int tokColumn, int type, char *tokdata, int toklen ) +{ + InputLoc loc; + + #ifdef LOG_TOKENS + cerr << "scanner:" << tokLine << ":" << tokColumn << + ": sending token to the parser " << Parser6_lelNames[type]; + cerr << " " << toklen; + if ( tokdata != 0 ) + cerr << " " << tokdata; + cerr << endl; + #endif + + loc.fileName = tokFileName; + loc.line = tokLine; + loc.col = tokColumn; + + toParser->token( loc, type, tokdata, toklen ); +} + +void Scanner::importToken( int token, char *start, char *end ) +{ + if ( cur_token == max_tokens ) + flushImport(); + + token_data[cur_token] = token; + if ( start == 0 ) { + token_strings[cur_token] = 0; + token_lens[cur_token] = 0; + } + else { + int toklen = end-start; + token_lens[cur_token] = toklen; + token_strings[cur_token] = new char[toklen+1]; + memcpy( token_strings[cur_token], start, toklen ); + token_strings[cur_token][toklen] = 0; + } + cur_token++; +} + +void Scanner::pass() +{ + if ( sectionPass ) + return; + + updateCol(); + + /* If no errors and we are at the bottom of the include stack (the + * source file listed on the command line) then write out the data. */ + if ( includeDepth == 0 && id->machineSpec == 0 && id->machineName == 0 ) + id->curItem->data.write( ts, te-ts ); +} + +void Scanner::pass( int token, char *start, char *end ) +{ + if ( sectionPass ) + return; + + if ( importMachines ) + importToken( token, start, end ); + + pass(); +} + +/* + * The scanner for processing sections, includes, imports, etc. + */ + +%%{ + machine section_parse; + alphtype int; + write data; +}%% + +void Scanner::init( ) +{ + %% write init; +} + +bool Scanner::active() +{ + if ( ignoreSection ) + return false; + + if ( parser == 0 && ! parserExistsError ) { + id->error(scan_loc()) << "this specification has no name, nor does any previous" + " specification" << endl; + parserExistsError = true; + } + + if ( parser == 0 ) + return false; + + return true; +} + +InputLoc Scanner::scan_loc() +{ + return makeInputLoc( fileName, line, column ); +} + +void Scanner::updateCol() +{ + char *from = lastnl; + if ( from == 0 ) + from = ts; + column += te - from; + lastnl = 0; +} + +void Scanner::handleMachine() +{ + if ( sectionPass ) { + /* Assign a name to the machine. */ + char *machine = word; + + SectionDictEl *sdEl = id->sectionDict.find( machine ); + if ( sdEl == 0 ) { + sdEl = new SectionDictEl( machine ); + sdEl->value = new Section( machine ); + id->sectionDict.insert( sdEl ); + } + + section = sdEl->value; + } + else { + + /* Assign a name to the machine. */ + char *machine = word; + + if ( !importMachines && inclSectionTarg == 0 ) { + ignoreSection = false; + + ParserDictEl *pdEl = id->parserDict.find( machine ); + if ( pdEl == 0 ) { + pdEl = new ParserDictEl( machine ); + pdEl->value = new Parser6( id, fileName, machine, sectionLoc, + id->hostLang, id->minimizeLevel, id->minimizeOpt ); + pdEl->value->init(); + id->parserDict.insert( pdEl ); + id->parserList.append( pdEl->value ); + + /* Also into the parse data dict. This is the new style. */ + ParseDataDictEl *pddEl = new ParseDataDictEl( machine ); + pddEl->value = pdEl->value->pd; + id->parseDataDict.insert( pddEl ); + id->parseDataList.append( pddEl->value ); + } + + parser = pdEl->value; + } + else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) { + /* found include target */ + ignoreSection = false; + parser = inclToParser; + } + else { + /* ignoring section */ + ignoreSection = true; + parser = 0; + } + } +} + +void Scanner::handleInclude() +{ + if ( sectionPass ) + return; + + if ( active() ) { + char *inclSectionName = word; + const char **includeChecks = 0; + + /* Implement defaults for the input file and section name. */ + if ( inclSectionName == 0 ) + inclSectionName = parser->sectionName; + + if ( lit != 0 ) { + long length = 0; + bool caseInsensitive = false; + char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive ); + + includeChecks = parser->pd->id->makeIncludePathChecks( fileName, data ); + } + else { + char *test = new char[strlen(fileName)+1]; + strcpy( test, fileName ); + + includeChecks = new const char*[2]; + + includeChecks[0] = test; + includeChecks[1] = 0; + } + + long found = 0; + ifstream *inFile = parser->pd->id->tryOpenInclude( includeChecks, found ); + if ( inFile == 0 ) { + id->error(scan_loc()) << "include: failed to locate file" << endl; + const char **tried = includeChecks; + while ( *tried != 0 ) + id->error(scan_loc()) << "include: attempted: \"" << *tried++ << '\"' << endl; + } + else { + /* Don't include anything that's already been included. */ + if ( !parser->pd->duplicateInclude( includeChecks[found], inclSectionName ) ) { + parser->pd->includeHistory.push_back( IncludeHistoryItem( + includeChecks[found], inclSectionName ) ); + + Scanner scanner( id, includeChecks[found], *inFile, parser, + inclSectionName, includeDepth+1, false ); + scanner.do_scan( ); + } + + delete inFile; + } + } +} + +void Scanner::handleImport() +{ + if ( sectionPass ) + return; + + if ( active() ) { + long length = 0; + bool caseInsensitive = false; + char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive ); + + const char **importChecks = parser->pd->id->makeIncludePathChecks( fileName, data ); + + /* Open the input file for reading. */ + long found = 0; + ifstream *inFile = parser->pd->id->tryOpenInclude( importChecks, found ); + if ( inFile == 0 ) { + id->error(scan_loc()) << "import: could not open import file " << + "for reading" << endl; + const char **tried = importChecks; + while ( *tried != 0 ) + id->error(scan_loc()) << "import: attempted: \"" << *tried++ << '\"' << endl; + } + + Scanner scanner( id, importChecks[found], *inFile, parser, + 0, includeDepth+1, true ); + scanner.do_scan( ); + scanner.importToken( 0, 0, 0 ); + scanner.flushImport(); + delete inFile; + } +} + +%%{ + machine section_parse; + + # Need the defines representing tokens. + import "rlparse.h"; + + action clear_words { word = lit = 0; word_len = lit_len = 0; } + action store_word { word = tokdata; word_len = toklen; } + action store_lit { lit = tokdata; lit_len = toklen; } + + action mach_err { id->error(scan_loc()) << "bad machine statement" << endl; } + action incl_err { id->error(scan_loc()) << "bad include statement" << endl; } + action import_err { id->error(scan_loc()) << "bad import statement" << endl; } + action write_err { id->error(scan_loc()) << "bad write statement" << endl; } + + action handle_machine { handleMachine(); } + action handle_include { handleInclude(); } + action handle_import { handleImport(); } + + machine_stmt = + ( KW_Machine TK_Word @store_word ';' ) @handle_machine + <>err mach_err <>eof mach_err; + + include_names = ( + TK_Word @store_word ( TK_Literal @store_lit )? | + TK_Literal @store_lit + ) >clear_words; + + include_stmt = + ( KW_Include include_names ';' ) @handle_include + <>err incl_err <>eof incl_err; + + import_stmt = + ( KW_Import TK_Literal @store_lit ';' ) @handle_import + <>err import_err <>eof import_err; + + action write_command + { + if ( sectionPass ) { + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::Write; + inputItem->loc.fileName = fileName; + inputItem->loc.line = line; + inputItem->loc.col = column; + inputItem->name = section->sectionName; + inputItem->section = section; + + /* Track the last reference. */ + inputItem->section->lastReference = inputItem; + + id->inputItems.append( inputItem ); + } + else { + if ( includeDepth == 0 && active() && + id->machineSpec == 0 && id->machineName == 0 ) + { + id->curItem = id->curItem->next; + id->curItem->pd = parser->pd; + id->curItem->parser = parser; + id->checkLastRef( id->curItem ); + } + } + } + + action write_arg + { + if ( sectionPass ) { + } + else { + if ( active() && id->machineSpec == 0 && id->machineName == 0 ) + id->curItem->writeArgs.push_back( strdup(tokdata) ); + } + } + + action write_close + { + if ( sectionPass ) { + } + else { + /* if ( active() && id->machineSpec == 0 && id->machineName == 0 ) + * id->curItem->writeArgs.append( 0 ); */ + } + } + + write_stmt = + ( KW_Write @write_command + ( TK_Word @write_arg )+ ';' @write_close ) + <>err write_err <>eof write_err; + + action handle_token + { + if ( sectionPass ) { + deleteTokdata( tokdata ); + } + else { + /* Send the token off to the parser. */ + if ( active() ) { + if ( tokdata != 0 ) { + linkTokdata( parser, tokdata ); + } + + directToParser( parser, fileName, line, column, type, tokdata, toklen ); + } + else { + deleteTokdata( tokdata ); + } + } + } + + # Catch everything else. + everything_else = + ^( KW_Machine | KW_Include | KW_Import | KW_Write ) @handle_token; + + main := ( + machine_stmt | + include_stmt | + import_stmt | + write_stmt | + everything_else + )*; +}%% + +void Scanner::token( int type, char c ) +{ + token( type, &c, &c + 1 ); +} + +void Scanner::token( int type ) +{ + token( type, 0, 0 ); +} + + +void Scanner::token( int type, char *start, char *end ) +{ + char *tokdata = 0; + int toklen = 0; + if ( start != 0 ) { + toklen = end-start; + tokdata = newTokdata( toklen + 1 ); + memcpy( tokdata, start, toklen ); + tokdata[toklen] = 0; + } + + processToken( type, tokdata, toklen ); +} + +void Scanner::processToken( int type, char *tokdata, int toklen ) +{ + int *p, *pe, *eof; + + if ( type < 0 ) + p = pe = eof = 0; + else { + p = &type; + pe = &type + 1; + eof = 0; + } + + %%{ + machine section_parse; + write exec; + }%% + + updateCol(); + + /* Record the last token for use in controlling the scan of subsequent + * tokens. */ + lastToken = type; +} + +void Scanner::startSection( ) +{ + parserExistsError = false; + + sectionLoc.fileName = fileName; + sectionLoc.line = line; + sectionLoc.col = column; +} + +void Scanner::endSection( ) +{ + /* Execute the eof actions for the section parser. */ + processToken( -1, 0, 0 ); + + if ( sectionPass ) { + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::EndSection; + inputItem->loc.fileName = fileName; + inputItem->loc.line = line; + inputItem->loc.col = column; + id->inputItems.append( inputItem ); + if ( section != 0 ) { + inputItem->section = section; + section->lastReference = inputItem; + } + + if ( includeDepth == 0 ) { + if ( id->machineSpec == 0 && id->machineName == 0 ) { + /* The end section may include a newline on the end, so + * we use the last line, which will count the newline. */ + InputItem *inputItem = new InputItem; + inputItem->type = InputItem::HostData; + inputItem->loc.fileName = fileName; + inputItem->loc.line = line; + inputItem->loc.col = column; + id->inputItems.append( inputItem ); + } + } + } + else { + /* Close off the section with the parser. */ + if ( includeDepth == 0 && active() ) { + InputLoc loc; + loc.fileName = fileName; + loc.line = line; + loc.col = column; + + parser->token( loc, TK_EndSection, 0, 0 ); + + id->curItem = id->curItem->next; + + if ( parser != 0 ) { + id->curItem->pd = parser->pd; + id->curItem->parser = parser; + } + + id->checkLastRef( id->curItem ); + } + + if ( includeDepth == 0 ) { + if ( id->machineSpec == 0 && id->machineName == 0 ) { + id->curItem = id->curItem->next; + id->checkLastRef( id->curItem ); + } + } + } +} + +%%{ + machine rlscan; + + # This is sent by the driver code. + EOF = 0; + + action inc_nl { + lastnl = p; + column = 0; + line++; + } + NL = '\n' @inc_nl; + + # Identifiers, numbers, commetns, and other common things. + ident = ( alpha | '_' ) ( alpha |digit |'_' )*; + ocaml_ident = ( alpha | '_' ) ( alpha |digit |'_' )* "'"?; + number = digit+; + hex_number = '0x' [0-9a-fA-F]+; + + c_comment = + '/*' ( any | NL )* :>> '*/'; + + cpp_comment = + '//' [^\n]* NL; + + c_cpp_comment = c_comment | cpp_comment; + + ruby_comment = '#' [^\n]* NL; + + # These literal forms are common to host code and ragel. + s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'"; + d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"'; + host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/'; + + whitespace = [ \t] | NL; + pound_comment = '#' [^\n]* NL; + + # An inline block of code for languages other than Ruby. + inline_code := |* + # Inline expression keywords. + "fpc" => { token( KW_PChar ); }; + "fc" => { token( KW_Char ); }; + "fcurs" => { token( KW_CurState ); }; + "ftargs" => { token( KW_TargState ); }; + "fentry" => { + whitespaceOn = false; + token( KW_Entry ); + }; + + # Inline statement keywords. + "fhold" => { + whitespaceOn = false; + token( KW_Hold ); + }; + "fexec" => { token( KW_Exec, 0, 0 ); }; + "fgoto" => { + whitespaceOn = false; + token( KW_Goto ); + }; + "fnext" => { + whitespaceOn = false; + token( KW_Next ); + }; + "fcall" => { + whitespaceOn = false; + token( KW_Call ); + }; + "fret" => { + whitespaceOn = false; + token( KW_Ret ); + }; + "fbreak" => { + whitespaceOn = false; + token( KW_Break ); + }; + "fncall" => { + whitespaceOn = false; + token( KW_Ncall ); + }; + "fnret" => { + whitespaceOn = false; + token( KW_Nret ); + }; + "fnbreak" => { + whitespaceOn = false; + token( KW_Nbreak ); + }; + + ident => { token( TK_Word, ts, te ); }; + + number => { token( TK_UInt, ts, te ); }; + hex_number => { token( TK_Hex, ts, te ); }; + + ( s_literal | d_literal ) + => { token( IL_Literal, ts, te ); }; + + whitespace+ => { + if ( whitespaceOn ) + token( IL_WhiteSpace, ts, te ); + }; + + c_cpp_comment => { token( IL_Comment, ts, te ); }; + + "::" => { token( TK_NameSep, ts, te ); }; + + # Some symbols need to go to the parser as with their cardinal value as + # the token type (as opposed to being sent as anonymous symbols) + # because they are part of the sequences which we interpret. The * ) ; + # symbols cause whitespace parsing to come back on. This gets turned + # off by some keywords. + + ";" => { + whitespaceOn = true; + token( *ts, ts, te ); + if ( inlineBlockType == SemiTerminated ) + fret; + }; + + "$" [a-zA-Z_][a-zA-Z_0-9]* => { + if ( parser != 0 && parser->parseSubstitutions ) + token( TK_SubstRef, ts+1, te ); + else { + token( IL_Symbol, ts, ts+1 ); + fexec ts+1; + } + }; + + [*)] => { + whitespaceOn = true; + token( *ts, ts, te ); + }; + + [,(] => { token( *ts, ts, te ); }; + + '{' => { + token( IL_Symbol, ts, te ); + curly_count += 1; + }; + + '}' => { + if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) { + /* Inline code block ends. */ + token( '}' ); + fret; + } + else { + /* Either a semi terminated inline block or only the closing + * brace of some inner scope, not the block's closing brace. */ + token( IL_Symbol, ts, te ); + } + }; + + EOF => { + id->error(scan_loc()) << "unterminated code block" << endl; + }; + + # Send every other character as a symbol. + any => { token( IL_Symbol, ts, te ); }; + *|; + + or_literal := |* + # Escape sequences in OR expressions. + '\\0' => { token( RE_Char, '\0' ); }; + '\\a' => { token( RE_Char, '\a' ); }; + '\\b' => { token( RE_Char, '\b' ); }; + '\\t' => { token( RE_Char, '\t' ); }; + '\\n' => { token( RE_Char, '\n' ); }; + '\\v' => { token( RE_Char, '\v' ); }; + '\\f' => { token( RE_Char, '\f' ); }; + '\\r' => { token( RE_Char, '\r' ); }; + '\\\n' => { updateCol(); }; + '\\' any => { token( RE_Char, ts+1, te ); }; + + # Range dash in an OR expression. + '-' => { token( RE_Dash, 0, 0 ); }; + + # Terminate an OR expression. + ']' => { token( RE_SqClose ); fret; }; + + EOF => { + id->error(scan_loc()) << "unterminated OR literal" << endl; + }; + + # Characters in an OR expression. + [^\]] => { token( RE_Char, ts, te ); }; + + *|; + + ragel_re_literal := |* + # Escape sequences in regular expressions. + '\\0' => { token( RE_Char, '\0' ); }; + '\\a' => { token( RE_Char, '\a' ); }; + '\\b' => { token( RE_Char, '\b' ); }; + '\\t' => { token( RE_Char, '\t' ); }; + '\\n' => { token( RE_Char, '\n' ); }; + '\\v' => { token( RE_Char, '\v' ); }; + '\\f' => { token( RE_Char, '\f' ); }; + '\\r' => { token( RE_Char, '\r' ); }; + '\\\n' => { updateCol(); }; + '\\' any => { token( RE_Char, ts+1, te ); }; + + # Terminate an OR expression. + '/' [i]? => { + token( RE_Slash, ts, te ); + fgoto parser_def; + }; + + # Special characters. + '.' => { token( RE_Dot ); }; + '*' => { token( RE_Star ); }; + + '[' => { token( RE_SqOpen ); fcall or_literal; }; + '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; + + EOF => { + id->error(scan_loc()) << "unterminated regular expression" << endl; + }; + + # Characters in an OR expression. + [^\/] => { token( RE_Char, ts, te ); }; + *|; + + # We need a separate token space here to avoid the ragel keywords. + write_statement := |* + ident => { token( TK_Word, ts, te ); } ; + [ \t\n]+ => { updateCol(); }; + ';' => { token( ';' ); fgoto parser_def; }; + + EOF => { + id->error(scan_loc()) << "unterminated write statement" << endl; + }; + *|; + + # Parser definitions. + parser_def := |* + #'length_cond' => { token( KW_Length ); }; + 'machine' => { token( KW_Machine ); }; + 'include' => { token( KW_Include ); }; + 'import' => { token( KW_Import ); }; + 'write' => { + token( KW_Write ); + fgoto write_statement; + }; + 'action' => { token( KW_Action ); }; + 'alphtype' => { token( KW_AlphType ); }; + 'prepush' => { token( KW_PrePush ); }; + 'postpop' => { token( KW_PostPop ); }; + + 'nfaprepush' => { token( KW_NfaPrePush ); }; + 'nfapostpop' => { token( KW_NfaPostPop ); }; + + # FIXME: Enable this post 5.17. + # 'range' => { token( KW_Range ); }; + + 'getkey' => { + token( KW_GetKey ); + inlineBlockType = SemiTerminated; + fcall inline_code; + }; + 'access' => { + token( KW_Access ); + inlineBlockType = SemiTerminated; + fcall inline_code; + }; + 'variable' => { + token( KW_Variable ); + inlineBlockType = SemiTerminated; + fcall inline_code; + }; + 'when' => { token( KW_When ); }; + 'inwhen' => { token( KW_InWhen ); }; + 'outwhen' => { token( KW_OutWhen ); }; + 'eof' => { token( KW_Eof ); }; + 'err' => { token( KW_Err ); }; + 'lerr' => { token( KW_Lerr ); }; + 'to' => { token( KW_To ); }; + 'from' => { token( KW_From ); }; + 'export' => { token( KW_Export ); }; + + # Identifiers. + ident => { token( TK_Word, ts, te ); } ; + + # Numbers + number => { token( TK_UInt, ts, te ); }; + hex_number => { token( TK_Hex, ts, te ); }; + + # Literals, with optionals. + ( s_literal | d_literal ) [i]? + => { token( TK_Literal, ts, te ); }; + + '[' => { token( RE_SqOpen ); fcall or_literal; }; + '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; + + '/' => { token( RE_Slash ); fgoto ragel_re_literal; }; + + # Ignore. + pound_comment => { updateCol(); }; + + ':=' => { token( TK_ColonEquals ); }; + '|=' => { token( TK_BarEquals ); }; + + # To State Actions. + ">~" => { token( TK_StartToState ); }; + "$~" => { token( TK_AllToState ); }; + "%~" => { token( TK_FinalToState ); }; + "<~" => { token( TK_NotStartToState ); }; + "@~" => { token( TK_NotFinalToState ); }; + "<>~" => { token( TK_MiddleToState ); }; + + # From State actions + ">*" => { token( TK_StartFromState ); }; + "$*" => { token( TK_AllFromState ); }; + "%*" => { token( TK_FinalFromState ); }; + "<*" => { token( TK_NotStartFromState ); }; + "@*" => { token( TK_NotFinalFromState ); }; + "<>*" => { token( TK_MiddleFromState ); }; + + # EOF Actions. + ">/" => { token( TK_StartEOF ); }; + "$/" => { token( TK_AllEOF ); }; + "%/" => { token( TK_FinalEOF ); }; + "</" => { token( TK_NotStartEOF ); }; + "@/" => { token( TK_NotFinalEOF ); }; + "<>/" => { token( TK_MiddleEOF ); }; + + # Global Error actions. + ">!" => { token( TK_StartGblError ); }; + "$!" => { token( TK_AllGblError ); }; + "%!" => { token( TK_FinalGblError ); }; + "<!" => { token( TK_NotStartGblError ); }; + "@!" => { token( TK_NotFinalGblError ); }; + "<>!" => { token( TK_MiddleGblError ); }; + + # Local error actions. + ">^" => { token( TK_StartLocalError ); }; + "$^" => { token( TK_AllLocalError ); }; + "%^" => { token( TK_FinalLocalError ); }; + "<^" => { token( TK_NotStartLocalError ); }; + "@^" => { token( TK_NotFinalLocalError ); }; + "<>^" => { token( TK_MiddleLocalError ); }; + + # Middle. + "<>" => { token( TK_Middle ); }; + + # Conditions. + '>?' => { token( TK_StartCond ); }; + '$?' => { token( TK_AllCond ); }; + '%?' => { token( TK_LeavingCond ); }; + + '..' => { token( TK_DotDot ); }; + '../i' => { token( TK_DotDotIndep ); }; + + '**' => { token( TK_StarStar ); }; + '--' => { token( TK_DashDash ); }; + '->' => { token( TK_Arrow ); }; + '=>' => { token( TK_DoubleArrow ); }; + + ":>" => { token( TK_ColonGt ); }; + ":>>" => { token( TK_ColonGtGt ); }; + "<:" => { token( TK_LtColon ); }; + + ":nfa(" => { token( TK_ColonNfaOpen ); }; + ":cond(" => { token( TK_ColonCondOpen ); }; + ":condstar(" => { token( TK_ColonCondStarOpen ); }; + ":condplus(" => { token( TK_ColonCondPlusOpen ); }; + ":nomax(" => { token( TK_ColonNoMaxOpen ); }; + "):" => { token( TK_CloseColon ); }; + + # Opening of longest match. + "|*" => { token( TK_BarStar ); }; + + # Separater for name references. + "::" => { token( TK_NameSep, ts, te ); }; + + '}%%' => { + updateCol(); + endSection(); + fret; + }; + + [ \t\r]+ => { updateCol(); }; + + # If we are in a single line machine then newline may end the spec. + NL => { + updateCol(); + if ( singleLineSpec ) { + endSection(); + fret; + } + }; + + '{' => { + if ( lastToken == KW_Export || lastToken == KW_Entry ) + token( '{' ); + else { + token( '{' ); + curly_count = 1; + inlineBlockType = CurlyDelimited; + fcall inline_code; + } + }; + + EOF => { + id->error(scan_loc()) << "unterminated ragel section" << endl; + }; + + any => { token( *ts ); } ; + *|; + + # Outside code scanner. These tokens get passed through. + main := |* + 'define' => { pass( IMP_Define, 0, 0 ); }; + ident => { pass( IMP_Word, ts, te ); }; + number => { pass( IMP_UInt, ts, te ); }; + c_cpp_comment => { pass(); }; + ( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); }; + + '%%{' => { + updateCol(); + singleLineSpec = false; + startSection(); + fcall parser_def; + }; + '%%' => { + updateCol(); + singleLineSpec = true; + startSection(); + fcall parser_def; + }; + whitespace+ => { pass(); }; + EOF; + any => { pass( *ts, 0, 0 ); }; + *|; +}%% + +%% write data; + +void Scanner::do_scan() +{ + int bufsize = 8; + char *buf = new char[bufsize]; + int cs, act, have = 0; + int top; + + /* The stack is two deep, one level for going into ragel defs from the main + * machines which process outside code, and another for going into or literals + * from either a ragel spec, or a regular expression. */ + int stack[2]; + int curly_count = 0; + bool execute = true; + bool singleLineSpec = false; + InlineBlockType inlineBlockType = CurlyDelimited; + + line = 1; + column = 1; + lastnl = 0; + + /* Init the section parser and the character scanner. */ + init(); + %% write init; + + /* Set up the start state. FIXME: After 5.20 is released the nocs write + * init option should be used, the main machine eliminated and this statement moved + * above the write init. */ + cs = rlscan_en_main; + + while ( execute ) { + char *p = buf + have; + int space = bufsize - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. Grow it. */ + bufsize = bufsize * 2; + char *newbuf = new char[bufsize]; + + /* Recompute p and space. */ + p = newbuf + have; + space = bufsize - have; + + /* Patch up pointers possibly in use. */ + if ( ts != 0 ) + ts = newbuf + ( ts - buf ); + te = newbuf + ( te - buf ); + + /* Copy the new buffer in. */ + memcpy( newbuf, buf, have ); + delete[] buf; + buf = newbuf; + } + + input.read( p, space ); + int len = input.gcount(); + char *pe = p + len; + + /* If we see eof then append the eof var. */ + char *eof = 0; + if ( len == 0 ) { + eof = pe; + execute = false; + } + + %% write exec; + + /* Check if we failed. */ + if ( cs == rlscan_error ) { + /* Machine failed before finding a token. I'm not yet sure if this + * is reachable. */ + id->error(scan_loc()) << "scanner error" << endl; + id->abortCompile( 1 ); + } + + /* Decide if we need to preserve anything. */ + char *preserve = ts; + + /* Now set up the prefix. */ + if ( preserve == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + have = pe - preserve; + memmove( buf, preserve, have ); + unsigned int shiftback = preserve - buf; + if ( ts != 0 ) + ts -= shiftback; + te -= shiftback; + + preserve = buf; + } + } + + delete[] buf; +} |