diff options
Diffstat (limited to 'ragel/rlscan.rl')
-rw-r--r-- | ragel/rlscan.rl | 1193 |
1 files changed, 0 insertions, 1193 deletions
diff --git a/ragel/rlscan.rl b/ragel/rlscan.rl deleted file mode 100644 index f745b9a0..00000000 --- a/ragel/rlscan.rl +++ /dev/null @@ -1,1193 +0,0 @@ -/* - * Copyright 2006-2007 Adrian Thurston <thurston@colm.net> - * Copyright 2011 Josef Goettgens - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <iostream> -#include <fstream> -#include <string.h> - -#include "ragel.h" -#include "rlscan.h" -#include "inputdata.h" - -//#define LOG_TOKENS - -using std::ifstream; -using std::istream; -using std::ostream; -using std::endl; - -enum InlineBlockType -{ - CurlyDelimited, - SemiTerminated -}; - -char *newTokdata( int toklen ) -{ - char *tokdata = new char[sizeof(TokHead) + toklen + 1]; - return tokdata + sizeof(TokHead); -} - -void deleteTokdata( char *tokdata ) -{ - if ( tokdata ) - delete[] ( tokdata - sizeof(TokHead) ); -} - -void linkTokdata( Parser6 *parser, char *tokdata ) -{ - TokHead *head = (TokHead*)( tokdata - sizeof(TokHead) ); - head->next = parser->tokHead; - parser->tokHead = head; -} - -void clearTokdata( Parser6 *parser ) -{ - while ( parser->tokHead != 0 ) { - TokHead *next = parser->tokHead->next; - delete[] (char*)parser->tokHead; - parser->tokHead = next; - } -} - -/* - * The Scanner for Importing - */ - -%%{ - machine inline_token_scan; - alphtype int; - access tok_; - - # Import scanner tokens. - import "rlparse.h"; - - main := |* - # Define of number. - IMP_Define IMP_Word IMP_UInt => { - int base = tok_ts - token_data; - int nameOff = 1; - int numOff = 2; - - directToParser( inclToParser, fileName, line, column, TK_Word, - token_strings[base+nameOff], token_lens[base+nameOff] ); - directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); - directToParser( inclToParser, fileName, line, column, TK_UInt, - token_strings[base+numOff], token_lens[base+numOff] ); - directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); - }; - - # Assignment of number. - IMP_Word '=' IMP_UInt => { - int base = tok_ts - token_data; - int nameOff = 0; - int numOff = 2; - - directToParser( inclToParser, fileName, line, column, TK_Word, - token_strings[base+nameOff], token_lens[base+nameOff] ); - directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); - directToParser( inclToParser, fileName, line, column, TK_UInt, - token_strings[base+numOff], token_lens[base+numOff] ); - directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); - }; - - # Define of literal. - IMP_Define IMP_Word IMP_Literal => { - int base = tok_ts - token_data; - int nameOff = 1; - int litOff = 2; - - directToParser( inclToParser, fileName, line, column, TK_Word, - token_strings[base+nameOff], token_lens[base+nameOff] ); - directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); - directToParser( inclToParser, fileName, line, column, TK_Literal, - token_strings[base+litOff], token_lens[base+litOff] ); - directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); - }; - - # Assignment of literal. - IMP_Word '=' IMP_Literal => { - int base = tok_ts - token_data; - int nameOff = 0; - int litOff = 2; - - directToParser( inclToParser, fileName, line, column, TK_Word, - token_strings[base+nameOff], token_lens[base+nameOff] ); - directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); - directToParser( inclToParser, fileName, line, column, TK_Literal, - token_strings[base+litOff], token_lens[base+litOff] ); - directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); - }; - - # Catch everything else. - any; - *|; -}%% - -%% write data; - -void Scanner::flushImport() -{ - int *p = token_data; - int *pe = token_data + cur_token; - int *eof = 0; - - %%{ - machine inline_token_scan; - write init; - write exec; - }%% - - if ( tok_ts == 0 ) - cur_token = 0; - else { - cur_token = pe - tok_ts; - int ts_offset = tok_ts - token_data; - memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) ); - memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) ); - memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) ); - } -} - -void Scanner::directToParser( Parser6 *toParser, const char *tokFileName, int tokLine, - int tokColumn, int type, char *tokdata, int toklen ) -{ - InputLoc loc; - - #ifdef LOG_TOKENS - cerr << "scanner:" << tokLine << ":" << tokColumn << - ": sending token to the parser " << Parser6_lelNames[type]; - cerr << " " << toklen; - if ( tokdata != 0 ) - cerr << " " << tokdata; - cerr << endl; - #endif - - loc.fileName = tokFileName; - loc.line = tokLine; - loc.col = tokColumn; - - toParser->token( loc, type, tokdata, toklen ); -} - -void Scanner::importToken( int token, char *start, char *end ) -{ - if ( cur_token == max_tokens ) - flushImport(); - - token_data[cur_token] = token; - if ( start == 0 ) { - token_strings[cur_token] = 0; - token_lens[cur_token] = 0; - } - else { - int toklen = end-start; - token_lens[cur_token] = toklen; - token_strings[cur_token] = new char[toklen+1]; - memcpy( token_strings[cur_token], start, toklen ); - token_strings[cur_token][toklen] = 0; - } - cur_token++; -} - -void Scanner::pass() -{ - if ( sectionPass ) - return; - - updateCol(); - - /* If no errors and we are at the bottom of the include stack (the - * source file listed on the command line) then write out the data. */ - if ( includeDepth == 0 && id->machineSpec == 0 && id->machineName == 0 ) - id->curItem->data.write( ts, te-ts ); -} - -void Scanner::pass( int token, char *start, char *end ) -{ - if ( sectionPass ) - return; - - if ( importMachines ) - importToken( token, start, end ); - - pass(); -} - -/* - * The scanner for processing sections, includes, imports, etc. - */ - -%%{ - machine section_parse; - alphtype int; - write data; -}%% - -void Scanner::init( ) -{ - %% write init; -} - -bool Scanner::active() -{ - if ( ignoreSection ) - return false; - - if ( parser == 0 && ! parserExistsError ) { - id->error(scan_loc()) << "this specification has no name, nor does any previous" - " specification" << endl; - parserExistsError = true; - } - - if ( parser == 0 ) - return false; - - return true; -} - -InputLoc Scanner::scan_loc() -{ - return makeInputLoc( fileName, line, column ); -} - -void Scanner::updateCol() -{ - char *from = lastnl; - if ( from == 0 ) - from = ts; - column += te - from; - lastnl = 0; -} - -void Scanner::handleMachine() -{ - if ( sectionPass ) { - /* Assign a name to the machine. */ - char *machine = word; - - SectionDictEl *sdEl = id->sectionDict.find( machine ); - if ( sdEl == 0 ) { - sdEl = new SectionDictEl( machine ); - sdEl->value = new Section( machine ); - id->sectionDict.insert( sdEl ); - } - - section = sdEl->value; - } - else { - - /* Assign a name to the machine. */ - char *machine = word; - - if ( !importMachines && inclSectionTarg == 0 ) { - ignoreSection = false; - - ParserDictEl *pdEl = id->parserDict.find( machine ); - if ( pdEl == 0 ) { - pdEl = new ParserDictEl( machine ); - pdEl->value = new Parser6( id, fileName, machine, sectionLoc, - id->hostLang, id->minimizeLevel, id->minimizeOpt ); - pdEl->value->init(); - id->parserDict.insert( pdEl ); - id->parserList.append( pdEl->value ); - - /* Also into the parse data dict. This is the new style. */ - ParseDataDictEl *pddEl = new ParseDataDictEl( machine ); - pddEl->value = pdEl->value->pd; - id->parseDataDict.insert( pddEl ); - id->parseDataList.append( pddEl->value ); - } - - parser = pdEl->value; - } - else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) { - /* found include target */ - ignoreSection = false; - parser = inclToParser; - } - else { - /* ignoring section */ - ignoreSection = true; - parser = 0; - } - } -} - -void Scanner::handleInclude() -{ - if ( sectionPass ) - return; - - if ( active() ) { - char *inclSectionName = word; - const char **includeChecks = 0; - - /* Implement defaults for the input file and section name. */ - if ( inclSectionName == 0 ) - inclSectionName = parser->sectionName; - - if ( lit != 0 ) { - long length = 0; - bool caseInsensitive = false; - char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive ); - - includeChecks = parser->pd->id->makeIncludePathChecks( fileName, data ); - } - else { - char *test = new char[strlen(fileName)+1]; - strcpy( test, fileName ); - - includeChecks = new const char*[2]; - - includeChecks[0] = test; - includeChecks[1] = 0; - } - - long found = 0; - ifstream *inFile = parser->pd->id->tryOpenInclude( includeChecks, found ); - if ( inFile == 0 ) { - id->error(scan_loc()) << "include: failed to locate file" << endl; - const char **tried = includeChecks; - while ( *tried != 0 ) - id->error(scan_loc()) << "include: attempted: \"" << *tried++ << '\"' << endl; - } - else { - /* Don't include anything that's already been included. */ - if ( !parser->pd->duplicateInclude( includeChecks[found], inclSectionName ) ) { - parser->pd->includeHistory.push_back( IncludeHistoryItem( - includeChecks[found], inclSectionName ) ); - - Scanner scanner( id, includeChecks[found], *inFile, parser, - inclSectionName, includeDepth+1, false ); - scanner.do_scan( ); - } - - delete inFile; - } - } -} - -void Scanner::handleImport() -{ - if ( sectionPass ) - return; - - if ( active() ) { - long length = 0; - bool caseInsensitive = false; - char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive ); - - const char **importChecks = parser->pd->id->makeIncludePathChecks( fileName, data ); - - /* Open the input file for reading. */ - long found = 0; - ifstream *inFile = parser->pd->id->tryOpenInclude( importChecks, found ); - if ( inFile == 0 ) { - id->error(scan_loc()) << "import: could not open import file " << - "for reading" << endl; - const char **tried = importChecks; - while ( *tried != 0 ) - id->error(scan_loc()) << "import: attempted: \"" << *tried++ << '\"' << endl; - } - - Scanner scanner( id, importChecks[found], *inFile, parser, - 0, includeDepth+1, true ); - scanner.do_scan( ); - scanner.importToken( 0, 0, 0 ); - scanner.flushImport(); - delete inFile; - } -} - -%%{ - machine section_parse; - - # Need the defines representing tokens. - import "rlparse.h"; - - action clear_words { word = lit = 0; word_len = lit_len = 0; } - action store_word { word = tokdata; word_len = toklen; } - action store_lit { lit = tokdata; lit_len = toklen; } - - action mach_err { id->error(scan_loc()) << "bad machine statement" << endl; } - action incl_err { id->error(scan_loc()) << "bad include statement" << endl; } - action import_err { id->error(scan_loc()) << "bad import statement" << endl; } - action write_err { id->error(scan_loc()) << "bad write statement" << endl; } - - action handle_machine { handleMachine(); } - action handle_include { handleInclude(); } - action handle_import { handleImport(); } - - machine_stmt = - ( KW_Machine TK_Word @store_word ';' ) @handle_machine - <>err mach_err <>eof mach_err; - - include_names = ( - TK_Word @store_word ( TK_Literal @store_lit )? | - TK_Literal @store_lit - ) >clear_words; - - include_stmt = - ( KW_Include include_names ';' ) @handle_include - <>err incl_err <>eof incl_err; - - import_stmt = - ( KW_Import TK_Literal @store_lit ';' ) @handle_import - <>err import_err <>eof import_err; - - action write_command - { - if ( sectionPass ) { - InputItem *inputItem = new InputItem; - inputItem->type = InputItem::Write; - inputItem->loc.fileName = fileName; - inputItem->loc.line = line; - inputItem->loc.col = column; - inputItem->name = section->sectionName; - inputItem->section = section; - - /* Track the last reference. */ - inputItem->section->lastReference = inputItem; - - id->inputItems.append( inputItem ); - } - else { - if ( includeDepth == 0 && active() && - id->machineSpec == 0 && id->machineName == 0 ) - { - id->curItem = id->curItem->next; - id->curItem->pd = parser->pd; - id->curItem->parser = parser; - id->checkLastRef( id->curItem ); - } - } - } - - action write_arg - { - if ( sectionPass ) { - } - else { - if ( active() && id->machineSpec == 0 && id->machineName == 0 ) - id->curItem->writeArgs.push_back( strdup(tokdata) ); - } - } - - action write_close - { - if ( sectionPass ) { - } - else { - /* if ( active() && id->machineSpec == 0 && id->machineName == 0 ) - * id->curItem->writeArgs.append( 0 ); */ - } - } - - write_stmt = - ( KW_Write @write_command - ( TK_Word @write_arg )+ ';' @write_close ) - <>err write_err <>eof write_err; - - action handle_token - { - if ( sectionPass ) { - deleteTokdata( tokdata ); - } - else { - /* Send the token off to the parser. */ - if ( active() ) { - if ( tokdata != 0 ) { - linkTokdata( parser, tokdata ); - } - - directToParser( parser, fileName, line, column, type, tokdata, toklen ); - } - else { - deleteTokdata( tokdata ); - } - } - } - - # Catch everything else. - everything_else = - ^( KW_Machine | KW_Include | KW_Import | KW_Write ) @handle_token; - - main := ( - machine_stmt | - include_stmt | - import_stmt | - write_stmt | - everything_else - )*; -}%% - -void Scanner::token( int type, char c ) -{ - token( type, &c, &c + 1 ); -} - -void Scanner::token( int type ) -{ - token( type, 0, 0 ); -} - - -void Scanner::token( int type, char *start, char *end ) -{ - char *tokdata = 0; - int toklen = 0; - if ( start != 0 ) { - toklen = end-start; - tokdata = newTokdata( toklen + 1 ); - memcpy( tokdata, start, toklen ); - tokdata[toklen] = 0; - } - - processToken( type, tokdata, toklen ); -} - -void Scanner::processToken( int type, char *tokdata, int toklen ) -{ - int *p, *pe, *eof; - - if ( type < 0 ) - p = pe = eof = 0; - else { - p = &type; - pe = &type + 1; - eof = 0; - } - - %%{ - machine section_parse; - write exec; - }%% - - updateCol(); - - /* Record the last token for use in controlling the scan of subsequent - * tokens. */ - lastToken = type; -} - -void Scanner::startSection( ) -{ - parserExistsError = false; - - sectionLoc.fileName = fileName; - sectionLoc.line = line; - sectionLoc.col = column; -} - -void Scanner::endSection( ) -{ - /* Execute the eof actions for the section parser. */ - processToken( -1, 0, 0 ); - - if ( sectionPass ) { - InputItem *inputItem = new InputItem; - inputItem->type = InputItem::EndSection; - inputItem->loc.fileName = fileName; - inputItem->loc.line = line; - inputItem->loc.col = column; - id->inputItems.append( inputItem ); - if ( section != 0 ) { - inputItem->section = section; - section->lastReference = inputItem; - } - - if ( includeDepth == 0 ) { - if ( id->machineSpec == 0 && id->machineName == 0 ) { - /* The end section may include a newline on the end, so - * we use the last line, which will count the newline. */ - InputItem *inputItem = new InputItem; - inputItem->type = InputItem::HostData; - inputItem->loc.fileName = fileName; - inputItem->loc.line = line; - inputItem->loc.col = column; - id->inputItems.append( inputItem ); - } - } - } - else { - /* Close off the section with the parser. */ - if ( includeDepth == 0 && active() ) { - InputLoc loc; - loc.fileName = fileName; - loc.line = line; - loc.col = column; - - parser->token( loc, TK_EndSection, 0, 0 ); - - id->curItem = id->curItem->next; - - if ( parser != 0 ) { - id->curItem->pd = parser->pd; - id->curItem->parser = parser; - } - - id->checkLastRef( id->curItem ); - } - - if ( includeDepth == 0 ) { - if ( id->machineSpec == 0 && id->machineName == 0 ) { - id->curItem = id->curItem->next; - id->checkLastRef( id->curItem ); - } - } - } -} - -%%{ - machine rlscan; - - # This is sent by the driver code. - EOF = 0; - - action inc_nl { - lastnl = p; - column = 0; - line++; - } - NL = '\n' @inc_nl; - - # Identifiers, numbers, commetns, and other common things. - ident = ( alpha | '_' ) ( alpha |digit |'_' )*; - ocaml_ident = ( alpha | '_' ) ( alpha |digit |'_' )* "'"?; - number = digit+; - hex_number = '0x' [0-9a-fA-F]+; - - c_comment = - '/*' ( any | NL )* :>> '*/'; - - cpp_comment = - '//' [^\n]* NL; - - c_cpp_comment = c_comment | cpp_comment; - - ruby_comment = '#' [^\n]* NL; - - # These literal forms are common to host code and ragel. - s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'"; - d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"'; - host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/'; - - whitespace = [ \t] | NL; - pound_comment = '#' [^\n]* NL; - - # An inline block of code for languages other than Ruby. - inline_code := |* - # Inline expression keywords. - "fpc" => { token( KW_PChar ); }; - "fc" => { token( KW_Char ); }; - "fcurs" => { token( KW_CurState ); }; - "ftargs" => { token( KW_TargState ); }; - "fentry" => { - whitespaceOn = false; - token( KW_Entry ); - }; - - # Inline statement keywords. - "fhold" => { - whitespaceOn = false; - token( KW_Hold ); - }; - "fexec" => { token( KW_Exec, 0, 0 ); }; - "fgoto" => { - whitespaceOn = false; - token( KW_Goto ); - }; - "fnext" => { - whitespaceOn = false; - token( KW_Next ); - }; - "fcall" => { - whitespaceOn = false; - token( KW_Call ); - }; - "fret" => { - whitespaceOn = false; - token( KW_Ret ); - }; - "fbreak" => { - whitespaceOn = false; - token( KW_Break ); - }; - "fncall" => { - whitespaceOn = false; - token( KW_Ncall ); - }; - "fnret" => { - whitespaceOn = false; - token( KW_Nret ); - }; - "fnbreak" => { - whitespaceOn = false; - token( KW_Nbreak ); - }; - - ident => { token( TK_Word, ts, te ); }; - - number => { token( TK_UInt, ts, te ); }; - hex_number => { token( TK_Hex, ts, te ); }; - - ( s_literal | d_literal ) - => { token( IL_Literal, ts, te ); }; - - whitespace+ => { - if ( whitespaceOn ) - token( IL_WhiteSpace, ts, te ); - }; - - c_cpp_comment => { token( IL_Comment, ts, te ); }; - - "::" => { token( TK_NameSep, ts, te ); }; - - # Some symbols need to go to the parser as with their cardinal value as - # the token type (as opposed to being sent as anonymous symbols) - # because they are part of the sequences which we interpret. The * ) ; - # symbols cause whitespace parsing to come back on. This gets turned - # off by some keywords. - - ";" => { - whitespaceOn = true; - token( *ts, ts, te ); - if ( inlineBlockType == SemiTerminated ) - fret; - }; - - "$" [a-zA-Z_][a-zA-Z_0-9]* => { - if ( parser != 0 && parser->parseSubstitutions ) - token( TK_SubstRef, ts+1, te ); - else { - token( IL_Symbol, ts, ts+1 ); - fexec ts+1; - } - }; - - [*)] => { - whitespaceOn = true; - token( *ts, ts, te ); - }; - - [,(] => { token( *ts, ts, te ); }; - - '{' => { - token( IL_Symbol, ts, te ); - curly_count += 1; - }; - - '}' => { - if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) { - /* Inline code block ends. */ - token( '}' ); - fret; - } - else { - /* Either a semi terminated inline block or only the closing - * brace of some inner scope, not the block's closing brace. */ - token( IL_Symbol, ts, te ); - } - }; - - EOF => { - id->error(scan_loc()) << "unterminated code block" << endl; - }; - - # Send every other character as a symbol. - any => { token( IL_Symbol, ts, te ); }; - *|; - - or_literal := |* - # Escape sequences in OR expressions. - '\\0' => { token( RE_Char, '\0' ); }; - '\\a' => { token( RE_Char, '\a' ); }; - '\\b' => { token( RE_Char, '\b' ); }; - '\\t' => { token( RE_Char, '\t' ); }; - '\\n' => { token( RE_Char, '\n' ); }; - '\\v' => { token( RE_Char, '\v' ); }; - '\\f' => { token( RE_Char, '\f' ); }; - '\\r' => { token( RE_Char, '\r' ); }; - '\\\n' => { updateCol(); }; - '\\' any => { token( RE_Char, ts+1, te ); }; - - # Range dash in an OR expression. - '-' => { token( RE_Dash, 0, 0 ); }; - - # Terminate an OR expression. - ']' => { token( RE_SqClose ); fret; }; - - EOF => { - id->error(scan_loc()) << "unterminated OR literal" << endl; - }; - - # Characters in an OR expression. - [^\]] => { token( RE_Char, ts, te ); }; - - *|; - - ragel_re_literal := |* - # Escape sequences in regular expressions. - '\\0' => { token( RE_Char, '\0' ); }; - '\\a' => { token( RE_Char, '\a' ); }; - '\\b' => { token( RE_Char, '\b' ); }; - '\\t' => { token( RE_Char, '\t' ); }; - '\\n' => { token( RE_Char, '\n' ); }; - '\\v' => { token( RE_Char, '\v' ); }; - '\\f' => { token( RE_Char, '\f' ); }; - '\\r' => { token( RE_Char, '\r' ); }; - '\\\n' => { updateCol(); }; - '\\' any => { token( RE_Char, ts+1, te ); }; - - # Terminate an OR expression. - '/' [i]? => { - token( RE_Slash, ts, te ); - fgoto parser_def; - }; - - # Special characters. - '.' => { token( RE_Dot ); }; - '*' => { token( RE_Star ); }; - - '[' => { token( RE_SqOpen ); fcall or_literal; }; - '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; - - EOF => { - id->error(scan_loc()) << "unterminated regular expression" << endl; - }; - - # Characters in an OR expression. - [^\/] => { token( RE_Char, ts, te ); }; - *|; - - # We need a separate token space here to avoid the ragel keywords. - write_statement := |* - ident => { token( TK_Word, ts, te ); } ; - [ \t\n]+ => { updateCol(); }; - ';' => { token( ';' ); fgoto parser_def; }; - - EOF => { - id->error(scan_loc()) << "unterminated write statement" << endl; - }; - *|; - - # Parser definitions. - parser_def := |* - #'length_cond' => { token( KW_Length ); }; - 'machine' => { token( KW_Machine ); }; - 'include' => { token( KW_Include ); }; - 'import' => { token( KW_Import ); }; - 'write' => { - token( KW_Write ); - fgoto write_statement; - }; - 'action' => { token( KW_Action ); }; - 'alphtype' => { token( KW_AlphType ); }; - 'prepush' => { token( KW_PrePush ); }; - 'postpop' => { token( KW_PostPop ); }; - - 'nfaprepush' => { token( KW_NfaPrePush ); }; - 'nfapostpop' => { token( KW_NfaPostPop ); }; - - # FIXME: Enable this post 5.17. - # 'range' => { token( KW_Range ); }; - - 'getkey' => { - token( KW_GetKey ); - inlineBlockType = SemiTerminated; - fcall inline_code; - }; - 'access' => { - token( KW_Access ); - inlineBlockType = SemiTerminated; - fcall inline_code; - }; - 'variable' => { - token( KW_Variable ); - inlineBlockType = SemiTerminated; - fcall inline_code; - }; - 'when' => { token( KW_When ); }; - 'inwhen' => { token( KW_InWhen ); }; - 'outwhen' => { token( KW_OutWhen ); }; - 'eof' => { token( KW_Eof ); }; - 'err' => { token( KW_Err ); }; - 'lerr' => { token( KW_Lerr ); }; - 'to' => { token( KW_To ); }; - 'from' => { token( KW_From ); }; - 'export' => { token( KW_Export ); }; - - # Identifiers. - ident => { token( TK_Word, ts, te ); } ; - - # Numbers - number => { token( TK_UInt, ts, te ); }; - hex_number => { token( TK_Hex, ts, te ); }; - - # Literals, with optionals. - ( s_literal | d_literal ) [i]? - => { token( TK_Literal, ts, te ); }; - - '[' => { token( RE_SqOpen ); fcall or_literal; }; - '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; - - '/' => { token( RE_Slash ); fgoto ragel_re_literal; }; - - # Ignore. - pound_comment => { updateCol(); }; - - ':=' => { token( TK_ColonEquals ); }; - '|=' => { token( TK_BarEquals ); }; - - # To State Actions. - ">~" => { token( TK_StartToState ); }; - "$~" => { token( TK_AllToState ); }; - "%~" => { token( TK_FinalToState ); }; - "<~" => { token( TK_NotStartToState ); }; - "@~" => { token( TK_NotFinalToState ); }; - "<>~" => { token( TK_MiddleToState ); }; - - # From State actions - ">*" => { token( TK_StartFromState ); }; - "$*" => { token( TK_AllFromState ); }; - "%*" => { token( TK_FinalFromState ); }; - "<*" => { token( TK_NotStartFromState ); }; - "@*" => { token( TK_NotFinalFromState ); }; - "<>*" => { token( TK_MiddleFromState ); }; - - # EOF Actions. - ">/" => { token( TK_StartEOF ); }; - "$/" => { token( TK_AllEOF ); }; - "%/" => { token( TK_FinalEOF ); }; - "</" => { token( TK_NotStartEOF ); }; - "@/" => { token( TK_NotFinalEOF ); }; - "<>/" => { token( TK_MiddleEOF ); }; - - # Global Error actions. - ">!" => { token( TK_StartGblError ); }; - "$!" => { token( TK_AllGblError ); }; - "%!" => { token( TK_FinalGblError ); }; - "<!" => { token( TK_NotStartGblError ); }; - "@!" => { token( TK_NotFinalGblError ); }; - "<>!" => { token( TK_MiddleGblError ); }; - - # Local error actions. - ">^" => { token( TK_StartLocalError ); }; - "$^" => { token( TK_AllLocalError ); }; - "%^" => { token( TK_FinalLocalError ); }; - "<^" => { token( TK_NotStartLocalError ); }; - "@^" => { token( TK_NotFinalLocalError ); }; - "<>^" => { token( TK_MiddleLocalError ); }; - - # Middle. - "<>" => { token( TK_Middle ); }; - - # Conditions. - '>?' => { token( TK_StartCond ); }; - '$?' => { token( TK_AllCond ); }; - '%?' => { token( TK_LeavingCond ); }; - - '..' => { token( TK_DotDot ); }; - '../i' => { token( TK_DotDotIndep ); }; - - '**' => { token( TK_StarStar ); }; - '--' => { token( TK_DashDash ); }; - '->' => { token( TK_Arrow ); }; - '=>' => { token( TK_DoubleArrow ); }; - - ":>" => { token( TK_ColonGt ); }; - ":>>" => { token( TK_ColonGtGt ); }; - "<:" => { token( TK_LtColon ); }; - - ":nfa(" => { token( TK_ColonNfaOpen ); }; - ":cond(" => { token( TK_ColonCondOpen ); }; - ":condstar(" => { token( TK_ColonCondStarOpen ); }; - ":condplus(" => { token( TK_ColonCondPlusOpen ); }; - ":nomax(" => { token( TK_ColonNoMaxOpen ); }; - "):" => { token( TK_CloseColon ); }; - - # Opening of longest match. - "|*" => { token( TK_BarStar ); }; - - # Separater for name references. - "::" => { token( TK_NameSep, ts, te ); }; - - '}%%' => { - updateCol(); - endSection(); - fret; - }; - - [ \t\r]+ => { updateCol(); }; - - # If we are in a single line machine then newline may end the spec. - NL => { - updateCol(); - if ( singleLineSpec ) { - endSection(); - fret; - } - }; - - '{' => { - if ( lastToken == KW_Export || lastToken == KW_Entry ) - token( '{' ); - else { - token( '{' ); - curly_count = 1; - inlineBlockType = CurlyDelimited; - fcall inline_code; - } - }; - - EOF => { - id->error(scan_loc()) << "unterminated ragel section" << endl; - }; - - any => { token( *ts ); } ; - *|; - - # Outside code scanner. These tokens get passed through. - main := |* - 'define' => { pass( IMP_Define, 0, 0 ); }; - ident => { pass( IMP_Word, ts, te ); }; - number => { pass( IMP_UInt, ts, te ); }; - c_cpp_comment => { pass(); }; - ( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); }; - - '%%{' => { - updateCol(); - singleLineSpec = false; - startSection(); - fcall parser_def; - }; - '%%' => { - updateCol(); - singleLineSpec = true; - startSection(); - fcall parser_def; - }; - whitespace+ => { pass(); }; - EOF; - any => { pass( *ts, 0, 0 ); }; - *|; -}%% - -%% write data; - -void Scanner::do_scan() -{ - int bufsize = 8; - char *buf = new char[bufsize]; - int cs, act, have = 0; - int top; - - /* The stack is two deep, one level for going into ragel defs from the main - * machines which process outside code, and another for going into or literals - * from either a ragel spec, or a regular expression. */ - int stack[2]; - int curly_count = 0; - bool execute = true; - bool singleLineSpec = false; - InlineBlockType inlineBlockType = CurlyDelimited; - - line = 1; - column = 1; - lastnl = 0; - - /* Init the section parser and the character scanner. */ - init(); - %% write init; - - /* Set up the start state. FIXME: After 5.20 is released the nocs write - * init option should be used, the main machine eliminated and this statement moved - * above the write init. */ - cs = rlscan_en_main; - - while ( execute ) { - char *p = buf + have; - int space = bufsize - have; - - if ( space == 0 ) { - /* We filled up the buffer trying to scan a token. Grow it. */ - bufsize = bufsize * 2; - char *newbuf = new char[bufsize]; - - /* Recompute p and space. */ - p = newbuf + have; - space = bufsize - have; - - /* Patch up pointers possibly in use. */ - if ( ts != 0 ) - ts = newbuf + ( ts - buf ); - te = newbuf + ( te - buf ); - - /* Copy the new buffer in. */ - memcpy( newbuf, buf, have ); - delete[] buf; - buf = newbuf; - } - - input.read( p, space ); - int len = input.gcount(); - char *pe = p + len; - - /* If we see eof then append the eof var. */ - char *eof = 0; - if ( len == 0 ) { - eof = pe; - execute = false; - } - - %% write exec; - - /* Check if we failed. */ - if ( cs == rlscan_error ) { - /* Machine failed before finding a token. I'm not yet sure if this - * is reachable. */ - id->error(scan_loc()) << "scanner error" << endl; - id->abortCompile( 1 ); - } - - /* Decide if we need to preserve anything. */ - char *preserve = ts; - - /* Now set up the prefix. */ - if ( preserve == 0 ) - have = 0; - else { - /* There is data that needs to be shifted over. */ - have = pe - preserve; - memmove( buf, preserve, have ); - unsigned int shiftback = preserve - buf; - if ( ts != 0 ) - ts -= shiftback; - te -= shiftback; - - preserve = buf; - } - } - - delete[] buf; -} |