summaryrefslogtreecommitdiff
path: root/ragel/rlscan.rl
diff options
context:
space:
mode:
Diffstat (limited to 'ragel/rlscan.rl')
-rw-r--r--ragel/rlscan.rl1193
1 files changed, 0 insertions, 1193 deletions
diff --git a/ragel/rlscan.rl b/ragel/rlscan.rl
deleted file mode 100644
index f745b9a0..00000000
--- a/ragel/rlscan.rl
+++ /dev/null
@@ -1,1193 +0,0 @@
-/*
- * Copyright 2006-2007 Adrian Thurston <thurston@colm.net>
- * Copyright 2011 Josef Goettgens
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <iostream>
-#include <fstream>
-#include <string.h>
-
-#include "ragel.h"
-#include "rlscan.h"
-#include "inputdata.h"
-
-//#define LOG_TOKENS
-
-using std::ifstream;
-using std::istream;
-using std::ostream;
-using std::endl;
-
-enum InlineBlockType
-{
- CurlyDelimited,
- SemiTerminated
-};
-
-char *newTokdata( int toklen )
-{
- char *tokdata = new char[sizeof(TokHead) + toklen + 1];
- return tokdata + sizeof(TokHead);
-}
-
-void deleteTokdata( char *tokdata )
-{
- if ( tokdata )
- delete[] ( tokdata - sizeof(TokHead) );
-}
-
-void linkTokdata( Parser6 *parser, char *tokdata )
-{
- TokHead *head = (TokHead*)( tokdata - sizeof(TokHead) );
- head->next = parser->tokHead;
- parser->tokHead = head;
-}
-
-void clearTokdata( Parser6 *parser )
-{
- while ( parser->tokHead != 0 ) {
- TokHead *next = parser->tokHead->next;
- delete[] (char*)parser->tokHead;
- parser->tokHead = next;
- }
-}
-
-/*
- * The Scanner for Importing
- */
-
-%%{
- machine inline_token_scan;
- alphtype int;
- access tok_;
-
- # Import scanner tokens.
- import "rlparse.h";
-
- main := |*
- # Define of number.
- IMP_Define IMP_Word IMP_UInt => {
- int base = tok_ts - token_data;
- int nameOff = 1;
- int numOff = 2;
-
- directToParser( inclToParser, fileName, line, column, TK_Word,
- token_strings[base+nameOff], token_lens[base+nameOff] );
- directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
- directToParser( inclToParser, fileName, line, column, TK_UInt,
- token_strings[base+numOff], token_lens[base+numOff] );
- directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
- };
-
- # Assignment of number.
- IMP_Word '=' IMP_UInt => {
- int base = tok_ts - token_data;
- int nameOff = 0;
- int numOff = 2;
-
- directToParser( inclToParser, fileName, line, column, TK_Word,
- token_strings[base+nameOff], token_lens[base+nameOff] );
- directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
- directToParser( inclToParser, fileName, line, column, TK_UInt,
- token_strings[base+numOff], token_lens[base+numOff] );
- directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
- };
-
- # Define of literal.
- IMP_Define IMP_Word IMP_Literal => {
- int base = tok_ts - token_data;
- int nameOff = 1;
- int litOff = 2;
-
- directToParser( inclToParser, fileName, line, column, TK_Word,
- token_strings[base+nameOff], token_lens[base+nameOff] );
- directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
- directToParser( inclToParser, fileName, line, column, TK_Literal,
- token_strings[base+litOff], token_lens[base+litOff] );
- directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
- };
-
- # Assignment of literal.
- IMP_Word '=' IMP_Literal => {
- int base = tok_ts - token_data;
- int nameOff = 0;
- int litOff = 2;
-
- directToParser( inclToParser, fileName, line, column, TK_Word,
- token_strings[base+nameOff], token_lens[base+nameOff] );
- directToParser( inclToParser, fileName, line, column, '=', 0, 0 );
- directToParser( inclToParser, fileName, line, column, TK_Literal,
- token_strings[base+litOff], token_lens[base+litOff] );
- directToParser( inclToParser, fileName, line, column, ';', 0, 0 );
- };
-
- # Catch everything else.
- any;
- *|;
-}%%
-
-%% write data;
-
-void Scanner::flushImport()
-{
- int *p = token_data;
- int *pe = token_data + cur_token;
- int *eof = 0;
-
- %%{
- machine inline_token_scan;
- write init;
- write exec;
- }%%
-
- if ( tok_ts == 0 )
- cur_token = 0;
- else {
- cur_token = pe - tok_ts;
- int ts_offset = tok_ts - token_data;
- memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) );
- memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) );
- memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) );
- }
-}
-
-void Scanner::directToParser( Parser6 *toParser, const char *tokFileName, int tokLine,
- int tokColumn, int type, char *tokdata, int toklen )
-{
- InputLoc loc;
-
- #ifdef LOG_TOKENS
- cerr << "scanner:" << tokLine << ":" << tokColumn <<
- ": sending token to the parser " << Parser6_lelNames[type];
- cerr << " " << toklen;
- if ( tokdata != 0 )
- cerr << " " << tokdata;
- cerr << endl;
- #endif
-
- loc.fileName = tokFileName;
- loc.line = tokLine;
- loc.col = tokColumn;
-
- toParser->token( loc, type, tokdata, toklen );
-}
-
-void Scanner::importToken( int token, char *start, char *end )
-{
- if ( cur_token == max_tokens )
- flushImport();
-
- token_data[cur_token] = token;
- if ( start == 0 ) {
- token_strings[cur_token] = 0;
- token_lens[cur_token] = 0;
- }
- else {
- int toklen = end-start;
- token_lens[cur_token] = toklen;
- token_strings[cur_token] = new char[toklen+1];
- memcpy( token_strings[cur_token], start, toklen );
- token_strings[cur_token][toklen] = 0;
- }
- cur_token++;
-}
-
-void Scanner::pass()
-{
- if ( sectionPass )
- return;
-
- updateCol();
-
- /* If no errors and we are at the bottom of the include stack (the
- * source file listed on the command line) then write out the data. */
- if ( includeDepth == 0 && id->machineSpec == 0 && id->machineName == 0 )
- id->curItem->data.write( ts, te-ts );
-}
-
-void Scanner::pass( int token, char *start, char *end )
-{
- if ( sectionPass )
- return;
-
- if ( importMachines )
- importToken( token, start, end );
-
- pass();
-}
-
-/*
- * The scanner for processing sections, includes, imports, etc.
- */
-
-%%{
- machine section_parse;
- alphtype int;
- write data;
-}%%
-
-void Scanner::init( )
-{
- %% write init;
-}
-
-bool Scanner::active()
-{
- if ( ignoreSection )
- return false;
-
- if ( parser == 0 && ! parserExistsError ) {
- id->error(scan_loc()) << "this specification has no name, nor does any previous"
- " specification" << endl;
- parserExistsError = true;
- }
-
- if ( parser == 0 )
- return false;
-
- return true;
-}
-
-InputLoc Scanner::scan_loc()
-{
- return makeInputLoc( fileName, line, column );
-}
-
-void Scanner::updateCol()
-{
- char *from = lastnl;
- if ( from == 0 )
- from = ts;
- column += te - from;
- lastnl = 0;
-}
-
-void Scanner::handleMachine()
-{
- if ( sectionPass ) {
- /* Assign a name to the machine. */
- char *machine = word;
-
- SectionDictEl *sdEl = id->sectionDict.find( machine );
- if ( sdEl == 0 ) {
- sdEl = new SectionDictEl( machine );
- sdEl->value = new Section( machine );
- id->sectionDict.insert( sdEl );
- }
-
- section = sdEl->value;
- }
- else {
-
- /* Assign a name to the machine. */
- char *machine = word;
-
- if ( !importMachines && inclSectionTarg == 0 ) {
- ignoreSection = false;
-
- ParserDictEl *pdEl = id->parserDict.find( machine );
- if ( pdEl == 0 ) {
- pdEl = new ParserDictEl( machine );
- pdEl->value = new Parser6( id, fileName, machine, sectionLoc,
- id->hostLang, id->minimizeLevel, id->minimizeOpt );
- pdEl->value->init();
- id->parserDict.insert( pdEl );
- id->parserList.append( pdEl->value );
-
- /* Also into the parse data dict. This is the new style. */
- ParseDataDictEl *pddEl = new ParseDataDictEl( machine );
- pddEl->value = pdEl->value->pd;
- id->parseDataDict.insert( pddEl );
- id->parseDataList.append( pddEl->value );
- }
-
- parser = pdEl->value;
- }
- else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) {
- /* found include target */
- ignoreSection = false;
- parser = inclToParser;
- }
- else {
- /* ignoring section */
- ignoreSection = true;
- parser = 0;
- }
- }
-}
-
-void Scanner::handleInclude()
-{
- if ( sectionPass )
- return;
-
- if ( active() ) {
- char *inclSectionName = word;
- const char **includeChecks = 0;
-
- /* Implement defaults for the input file and section name. */
- if ( inclSectionName == 0 )
- inclSectionName = parser->sectionName;
-
- if ( lit != 0 ) {
- long length = 0;
- bool caseInsensitive = false;
- char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive );
-
- includeChecks = parser->pd->id->makeIncludePathChecks( fileName, data );
- }
- else {
- char *test = new char[strlen(fileName)+1];
- strcpy( test, fileName );
-
- includeChecks = new const char*[2];
-
- includeChecks[0] = test;
- includeChecks[1] = 0;
- }
-
- long found = 0;
- ifstream *inFile = parser->pd->id->tryOpenInclude( includeChecks, found );
- if ( inFile == 0 ) {
- id->error(scan_loc()) << "include: failed to locate file" << endl;
- const char **tried = includeChecks;
- while ( *tried != 0 )
- id->error(scan_loc()) << "include: attempted: \"" << *tried++ << '\"' << endl;
- }
- else {
- /* Don't include anything that's already been included. */
- if ( !parser->pd->duplicateInclude( includeChecks[found], inclSectionName ) ) {
- parser->pd->includeHistory.push_back( IncludeHistoryItem(
- includeChecks[found], inclSectionName ) );
-
- Scanner scanner( id, includeChecks[found], *inFile, parser,
- inclSectionName, includeDepth+1, false );
- scanner.do_scan( );
- }
-
- delete inFile;
- }
- }
-}
-
-void Scanner::handleImport()
-{
- if ( sectionPass )
- return;
-
- if ( active() ) {
- long length = 0;
- bool caseInsensitive = false;
- char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive );
-
- const char **importChecks = parser->pd->id->makeIncludePathChecks( fileName, data );
-
- /* Open the input file for reading. */
- long found = 0;
- ifstream *inFile = parser->pd->id->tryOpenInclude( importChecks, found );
- if ( inFile == 0 ) {
- id->error(scan_loc()) << "import: could not open import file " <<
- "for reading" << endl;
- const char **tried = importChecks;
- while ( *tried != 0 )
- id->error(scan_loc()) << "import: attempted: \"" << *tried++ << '\"' << endl;
- }
-
- Scanner scanner( id, importChecks[found], *inFile, parser,
- 0, includeDepth+1, true );
- scanner.do_scan( );
- scanner.importToken( 0, 0, 0 );
- scanner.flushImport();
- delete inFile;
- }
-}
-
-%%{
- machine section_parse;
-
- # Need the defines representing tokens.
- import "rlparse.h";
-
- action clear_words { word = lit = 0; word_len = lit_len = 0; }
- action store_word { word = tokdata; word_len = toklen; }
- action store_lit { lit = tokdata; lit_len = toklen; }
-
- action mach_err { id->error(scan_loc()) << "bad machine statement" << endl; }
- action incl_err { id->error(scan_loc()) << "bad include statement" << endl; }
- action import_err { id->error(scan_loc()) << "bad import statement" << endl; }
- action write_err { id->error(scan_loc()) << "bad write statement" << endl; }
-
- action handle_machine { handleMachine(); }
- action handle_include { handleInclude(); }
- action handle_import { handleImport(); }
-
- machine_stmt =
- ( KW_Machine TK_Word @store_word ';' ) @handle_machine
- <>err mach_err <>eof mach_err;
-
- include_names = (
- TK_Word @store_word ( TK_Literal @store_lit )? |
- TK_Literal @store_lit
- ) >clear_words;
-
- include_stmt =
- ( KW_Include include_names ';' ) @handle_include
- <>err incl_err <>eof incl_err;
-
- import_stmt =
- ( KW_Import TK_Literal @store_lit ';' ) @handle_import
- <>err import_err <>eof import_err;
-
- action write_command
- {
- if ( sectionPass ) {
- InputItem *inputItem = new InputItem;
- inputItem->type = InputItem::Write;
- inputItem->loc.fileName = fileName;
- inputItem->loc.line = line;
- inputItem->loc.col = column;
- inputItem->name = section->sectionName;
- inputItem->section = section;
-
- /* Track the last reference. */
- inputItem->section->lastReference = inputItem;
-
- id->inputItems.append( inputItem );
- }
- else {
- if ( includeDepth == 0 && active() &&
- id->machineSpec == 0 && id->machineName == 0 )
- {
- id->curItem = id->curItem->next;
- id->curItem->pd = parser->pd;
- id->curItem->parser = parser;
- id->checkLastRef( id->curItem );
- }
- }
- }
-
- action write_arg
- {
- if ( sectionPass ) {
- }
- else {
- if ( active() && id->machineSpec == 0 && id->machineName == 0 )
- id->curItem->writeArgs.push_back( strdup(tokdata) );
- }
- }
-
- action write_close
- {
- if ( sectionPass ) {
- }
- else {
- /* if ( active() && id->machineSpec == 0 && id->machineName == 0 )
- * id->curItem->writeArgs.append( 0 ); */
- }
- }
-
- write_stmt =
- ( KW_Write @write_command
- ( TK_Word @write_arg )+ ';' @write_close )
- <>err write_err <>eof write_err;
-
- action handle_token
- {
- if ( sectionPass ) {
- deleteTokdata( tokdata );
- }
- else {
- /* Send the token off to the parser. */
- if ( active() ) {
- if ( tokdata != 0 ) {
- linkTokdata( parser, tokdata );
- }
-
- directToParser( parser, fileName, line, column, type, tokdata, toklen );
- }
- else {
- deleteTokdata( tokdata );
- }
- }
- }
-
- # Catch everything else.
- everything_else =
- ^( KW_Machine | KW_Include | KW_Import | KW_Write ) @handle_token;
-
- main := (
- machine_stmt |
- include_stmt |
- import_stmt |
- write_stmt |
- everything_else
- )*;
-}%%
-
-void Scanner::token( int type, char c )
-{
- token( type, &c, &c + 1 );
-}
-
-void Scanner::token( int type )
-{
- token( type, 0, 0 );
-}
-
-
-void Scanner::token( int type, char *start, char *end )
-{
- char *tokdata = 0;
- int toklen = 0;
- if ( start != 0 ) {
- toklen = end-start;
- tokdata = newTokdata( toklen + 1 );
- memcpy( tokdata, start, toklen );
- tokdata[toklen] = 0;
- }
-
- processToken( type, tokdata, toklen );
-}
-
-void Scanner::processToken( int type, char *tokdata, int toklen )
-{
- int *p, *pe, *eof;
-
- if ( type < 0 )
- p = pe = eof = 0;
- else {
- p = &type;
- pe = &type + 1;
- eof = 0;
- }
-
- %%{
- machine section_parse;
- write exec;
- }%%
-
- updateCol();
-
- /* Record the last token for use in controlling the scan of subsequent
- * tokens. */
- lastToken = type;
-}
-
-void Scanner::startSection( )
-{
- parserExistsError = false;
-
- sectionLoc.fileName = fileName;
- sectionLoc.line = line;
- sectionLoc.col = column;
-}
-
-void Scanner::endSection( )
-{
- /* Execute the eof actions for the section parser. */
- processToken( -1, 0, 0 );
-
- if ( sectionPass ) {
- InputItem *inputItem = new InputItem;
- inputItem->type = InputItem::EndSection;
- inputItem->loc.fileName = fileName;
- inputItem->loc.line = line;
- inputItem->loc.col = column;
- id->inputItems.append( inputItem );
- if ( section != 0 ) {
- inputItem->section = section;
- section->lastReference = inputItem;
- }
-
- if ( includeDepth == 0 ) {
- if ( id->machineSpec == 0 && id->machineName == 0 ) {
- /* The end section may include a newline on the end, so
- * we use the last line, which will count the newline. */
- InputItem *inputItem = new InputItem;
- inputItem->type = InputItem::HostData;
- inputItem->loc.fileName = fileName;
- inputItem->loc.line = line;
- inputItem->loc.col = column;
- id->inputItems.append( inputItem );
- }
- }
- }
- else {
- /* Close off the section with the parser. */
- if ( includeDepth == 0 && active() ) {
- InputLoc loc;
- loc.fileName = fileName;
- loc.line = line;
- loc.col = column;
-
- parser->token( loc, TK_EndSection, 0, 0 );
-
- id->curItem = id->curItem->next;
-
- if ( parser != 0 ) {
- id->curItem->pd = parser->pd;
- id->curItem->parser = parser;
- }
-
- id->checkLastRef( id->curItem );
- }
-
- if ( includeDepth == 0 ) {
- if ( id->machineSpec == 0 && id->machineName == 0 ) {
- id->curItem = id->curItem->next;
- id->checkLastRef( id->curItem );
- }
- }
- }
-}
-
-%%{
- machine rlscan;
-
- # This is sent by the driver code.
- EOF = 0;
-
- action inc_nl {
- lastnl = p;
- column = 0;
- line++;
- }
- NL = '\n' @inc_nl;
-
- # Identifiers, numbers, commetns, and other common things.
- ident = ( alpha | '_' ) ( alpha |digit |'_' )*;
- ocaml_ident = ( alpha | '_' ) ( alpha |digit |'_' )* "'"?;
- number = digit+;
- hex_number = '0x' [0-9a-fA-F]+;
-
- c_comment =
- '/*' ( any | NL )* :>> '*/';
-
- cpp_comment =
- '//' [^\n]* NL;
-
- c_cpp_comment = c_comment | cpp_comment;
-
- ruby_comment = '#' [^\n]* NL;
-
- # These literal forms are common to host code and ragel.
- s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'";
- d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"';
- host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/';
-
- whitespace = [ \t] | NL;
- pound_comment = '#' [^\n]* NL;
-
- # An inline block of code for languages other than Ruby.
- inline_code := |*
- # Inline expression keywords.
- "fpc" => { token( KW_PChar ); };
- "fc" => { token( KW_Char ); };
- "fcurs" => { token( KW_CurState ); };
- "ftargs" => { token( KW_TargState ); };
- "fentry" => {
- whitespaceOn = false;
- token( KW_Entry );
- };
-
- # Inline statement keywords.
- "fhold" => {
- whitespaceOn = false;
- token( KW_Hold );
- };
- "fexec" => { token( KW_Exec, 0, 0 ); };
- "fgoto" => {
- whitespaceOn = false;
- token( KW_Goto );
- };
- "fnext" => {
- whitespaceOn = false;
- token( KW_Next );
- };
- "fcall" => {
- whitespaceOn = false;
- token( KW_Call );
- };
- "fret" => {
- whitespaceOn = false;
- token( KW_Ret );
- };
- "fbreak" => {
- whitespaceOn = false;
- token( KW_Break );
- };
- "fncall" => {
- whitespaceOn = false;
- token( KW_Ncall );
- };
- "fnret" => {
- whitespaceOn = false;
- token( KW_Nret );
- };
- "fnbreak" => {
- whitespaceOn = false;
- token( KW_Nbreak );
- };
-
- ident => { token( TK_Word, ts, te ); };
-
- number => { token( TK_UInt, ts, te ); };
- hex_number => { token( TK_Hex, ts, te ); };
-
- ( s_literal | d_literal )
- => { token( IL_Literal, ts, te ); };
-
- whitespace+ => {
- if ( whitespaceOn )
- token( IL_WhiteSpace, ts, te );
- };
-
- c_cpp_comment => { token( IL_Comment, ts, te ); };
-
- "::" => { token( TK_NameSep, ts, te ); };
-
- # Some symbols need to go to the parser as with their cardinal value as
- # the token type (as opposed to being sent as anonymous symbols)
- # because they are part of the sequences which we interpret. The * ) ;
- # symbols cause whitespace parsing to come back on. This gets turned
- # off by some keywords.
-
- ";" => {
- whitespaceOn = true;
- token( *ts, ts, te );
- if ( inlineBlockType == SemiTerminated )
- fret;
- };
-
- "$" [a-zA-Z_][a-zA-Z_0-9]* => {
- if ( parser != 0 && parser->parseSubstitutions )
- token( TK_SubstRef, ts+1, te );
- else {
- token( IL_Symbol, ts, ts+1 );
- fexec ts+1;
- }
- };
-
- [*)] => {
- whitespaceOn = true;
- token( *ts, ts, te );
- };
-
- [,(] => { token( *ts, ts, te ); };
-
- '{' => {
- token( IL_Symbol, ts, te );
- curly_count += 1;
- };
-
- '}' => {
- if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) {
- /* Inline code block ends. */
- token( '}' );
- fret;
- }
- else {
- /* Either a semi terminated inline block or only the closing
- * brace of some inner scope, not the block's closing brace. */
- token( IL_Symbol, ts, te );
- }
- };
-
- EOF => {
- id->error(scan_loc()) << "unterminated code block" << endl;
- };
-
- # Send every other character as a symbol.
- any => { token( IL_Symbol, ts, te ); };
- *|;
-
- or_literal := |*
- # Escape sequences in OR expressions.
- '\\0' => { token( RE_Char, '\0' ); };
- '\\a' => { token( RE_Char, '\a' ); };
- '\\b' => { token( RE_Char, '\b' ); };
- '\\t' => { token( RE_Char, '\t' ); };
- '\\n' => { token( RE_Char, '\n' ); };
- '\\v' => { token( RE_Char, '\v' ); };
- '\\f' => { token( RE_Char, '\f' ); };
- '\\r' => { token( RE_Char, '\r' ); };
- '\\\n' => { updateCol(); };
- '\\' any => { token( RE_Char, ts+1, te ); };
-
- # Range dash in an OR expression.
- '-' => { token( RE_Dash, 0, 0 ); };
-
- # Terminate an OR expression.
- ']' => { token( RE_SqClose ); fret; };
-
- EOF => {
- id->error(scan_loc()) << "unterminated OR literal" << endl;
- };
-
- # Characters in an OR expression.
- [^\]] => { token( RE_Char, ts, te ); };
-
- *|;
-
- ragel_re_literal := |*
- # Escape sequences in regular expressions.
- '\\0' => { token( RE_Char, '\0' ); };
- '\\a' => { token( RE_Char, '\a' ); };
- '\\b' => { token( RE_Char, '\b' ); };
- '\\t' => { token( RE_Char, '\t' ); };
- '\\n' => { token( RE_Char, '\n' ); };
- '\\v' => { token( RE_Char, '\v' ); };
- '\\f' => { token( RE_Char, '\f' ); };
- '\\r' => { token( RE_Char, '\r' ); };
- '\\\n' => { updateCol(); };
- '\\' any => { token( RE_Char, ts+1, te ); };
-
- # Terminate an OR expression.
- '/' [i]? => {
- token( RE_Slash, ts, te );
- fgoto parser_def;
- };
-
- # Special characters.
- '.' => { token( RE_Dot ); };
- '*' => { token( RE_Star ); };
-
- '[' => { token( RE_SqOpen ); fcall or_literal; };
- '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
-
- EOF => {
- id->error(scan_loc()) << "unterminated regular expression" << endl;
- };
-
- # Characters in an OR expression.
- [^\/] => { token( RE_Char, ts, te ); };
- *|;
-
- # We need a separate token space here to avoid the ragel keywords.
- write_statement := |*
- ident => { token( TK_Word, ts, te ); } ;
- [ \t\n]+ => { updateCol(); };
- ';' => { token( ';' ); fgoto parser_def; };
-
- EOF => {
- id->error(scan_loc()) << "unterminated write statement" << endl;
- };
- *|;
-
- # Parser definitions.
- parser_def := |*
- #'length_cond' => { token( KW_Length ); };
- 'machine' => { token( KW_Machine ); };
- 'include' => { token( KW_Include ); };
- 'import' => { token( KW_Import ); };
- 'write' => {
- token( KW_Write );
- fgoto write_statement;
- };
- 'action' => { token( KW_Action ); };
- 'alphtype' => { token( KW_AlphType ); };
- 'prepush' => { token( KW_PrePush ); };
- 'postpop' => { token( KW_PostPop ); };
-
- 'nfaprepush' => { token( KW_NfaPrePush ); };
- 'nfapostpop' => { token( KW_NfaPostPop ); };
-
- # FIXME: Enable this post 5.17.
- # 'range' => { token( KW_Range ); };
-
- 'getkey' => {
- token( KW_GetKey );
- inlineBlockType = SemiTerminated;
- fcall inline_code;
- };
- 'access' => {
- token( KW_Access );
- inlineBlockType = SemiTerminated;
- fcall inline_code;
- };
- 'variable' => {
- token( KW_Variable );
- inlineBlockType = SemiTerminated;
- fcall inline_code;
- };
- 'when' => { token( KW_When ); };
- 'inwhen' => { token( KW_InWhen ); };
- 'outwhen' => { token( KW_OutWhen ); };
- 'eof' => { token( KW_Eof ); };
- 'err' => { token( KW_Err ); };
- 'lerr' => { token( KW_Lerr ); };
- 'to' => { token( KW_To ); };
- 'from' => { token( KW_From ); };
- 'export' => { token( KW_Export ); };
-
- # Identifiers.
- ident => { token( TK_Word, ts, te ); } ;
-
- # Numbers
- number => { token( TK_UInt, ts, te ); };
- hex_number => { token( TK_Hex, ts, te ); };
-
- # Literals, with optionals.
- ( s_literal | d_literal ) [i]?
- => { token( TK_Literal, ts, te ); };
-
- '[' => { token( RE_SqOpen ); fcall or_literal; };
- '[^' => { token( RE_SqOpenNeg ); fcall or_literal; };
-
- '/' => { token( RE_Slash ); fgoto ragel_re_literal; };
-
- # Ignore.
- pound_comment => { updateCol(); };
-
- ':=' => { token( TK_ColonEquals ); };
- '|=' => { token( TK_BarEquals ); };
-
- # To State Actions.
- ">~" => { token( TK_StartToState ); };
- "$~" => { token( TK_AllToState ); };
- "%~" => { token( TK_FinalToState ); };
- "<~" => { token( TK_NotStartToState ); };
- "@~" => { token( TK_NotFinalToState ); };
- "<>~" => { token( TK_MiddleToState ); };
-
- # From State actions
- ">*" => { token( TK_StartFromState ); };
- "$*" => { token( TK_AllFromState ); };
- "%*" => { token( TK_FinalFromState ); };
- "<*" => { token( TK_NotStartFromState ); };
- "@*" => { token( TK_NotFinalFromState ); };
- "<>*" => { token( TK_MiddleFromState ); };
-
- # EOF Actions.
- ">/" => { token( TK_StartEOF ); };
- "$/" => { token( TK_AllEOF ); };
- "%/" => { token( TK_FinalEOF ); };
- "</" => { token( TK_NotStartEOF ); };
- "@/" => { token( TK_NotFinalEOF ); };
- "<>/" => { token( TK_MiddleEOF ); };
-
- # Global Error actions.
- ">!" => { token( TK_StartGblError ); };
- "$!" => { token( TK_AllGblError ); };
- "%!" => { token( TK_FinalGblError ); };
- "<!" => { token( TK_NotStartGblError ); };
- "@!" => { token( TK_NotFinalGblError ); };
- "<>!" => { token( TK_MiddleGblError ); };
-
- # Local error actions.
- ">^" => { token( TK_StartLocalError ); };
- "$^" => { token( TK_AllLocalError ); };
- "%^" => { token( TK_FinalLocalError ); };
- "<^" => { token( TK_NotStartLocalError ); };
- "@^" => { token( TK_NotFinalLocalError ); };
- "<>^" => { token( TK_MiddleLocalError ); };
-
- # Middle.
- "<>" => { token( TK_Middle ); };
-
- # Conditions.
- '>?' => { token( TK_StartCond ); };
- '$?' => { token( TK_AllCond ); };
- '%?' => { token( TK_LeavingCond ); };
-
- '..' => { token( TK_DotDot ); };
- '../i' => { token( TK_DotDotIndep ); };
-
- '**' => { token( TK_StarStar ); };
- '--' => { token( TK_DashDash ); };
- '->' => { token( TK_Arrow ); };
- '=>' => { token( TK_DoubleArrow ); };
-
- ":>" => { token( TK_ColonGt ); };
- ":>>" => { token( TK_ColonGtGt ); };
- "<:" => { token( TK_LtColon ); };
-
- ":nfa(" => { token( TK_ColonNfaOpen ); };
- ":cond(" => { token( TK_ColonCondOpen ); };
- ":condstar(" => { token( TK_ColonCondStarOpen ); };
- ":condplus(" => { token( TK_ColonCondPlusOpen ); };
- ":nomax(" => { token( TK_ColonNoMaxOpen ); };
- "):" => { token( TK_CloseColon ); };
-
- # Opening of longest match.
- "|*" => { token( TK_BarStar ); };
-
- # Separater for name references.
- "::" => { token( TK_NameSep, ts, te ); };
-
- '}%%' => {
- updateCol();
- endSection();
- fret;
- };
-
- [ \t\r]+ => { updateCol(); };
-
- # If we are in a single line machine then newline may end the spec.
- NL => {
- updateCol();
- if ( singleLineSpec ) {
- endSection();
- fret;
- }
- };
-
- '{' => {
- if ( lastToken == KW_Export || lastToken == KW_Entry )
- token( '{' );
- else {
- token( '{' );
- curly_count = 1;
- inlineBlockType = CurlyDelimited;
- fcall inline_code;
- }
- };
-
- EOF => {
- id->error(scan_loc()) << "unterminated ragel section" << endl;
- };
-
- any => { token( *ts ); } ;
- *|;
-
- # Outside code scanner. These tokens get passed through.
- main := |*
- 'define' => { pass( IMP_Define, 0, 0 ); };
- ident => { pass( IMP_Word, ts, te ); };
- number => { pass( IMP_UInt, ts, te ); };
- c_cpp_comment => { pass(); };
- ( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); };
-
- '%%{' => {
- updateCol();
- singleLineSpec = false;
- startSection();
- fcall parser_def;
- };
- '%%' => {
- updateCol();
- singleLineSpec = true;
- startSection();
- fcall parser_def;
- };
- whitespace+ => { pass(); };
- EOF;
- any => { pass( *ts, 0, 0 ); };
- *|;
-}%%
-
-%% write data;
-
-void Scanner::do_scan()
-{
- int bufsize = 8;
- char *buf = new char[bufsize];
- int cs, act, have = 0;
- int top;
-
- /* The stack is two deep, one level for going into ragel defs from the main
- * machines which process outside code, and another for going into or literals
- * from either a ragel spec, or a regular expression. */
- int stack[2];
- int curly_count = 0;
- bool execute = true;
- bool singleLineSpec = false;
- InlineBlockType inlineBlockType = CurlyDelimited;
-
- line = 1;
- column = 1;
- lastnl = 0;
-
- /* Init the section parser and the character scanner. */
- init();
- %% write init;
-
- /* Set up the start state. FIXME: After 5.20 is released the nocs write
- * init option should be used, the main machine eliminated and this statement moved
- * above the write init. */
- cs = rlscan_en_main;
-
- while ( execute ) {
- char *p = buf + have;
- int space = bufsize - have;
-
- if ( space == 0 ) {
- /* We filled up the buffer trying to scan a token. Grow it. */
- bufsize = bufsize * 2;
- char *newbuf = new char[bufsize];
-
- /* Recompute p and space. */
- p = newbuf + have;
- space = bufsize - have;
-
- /* Patch up pointers possibly in use. */
- if ( ts != 0 )
- ts = newbuf + ( ts - buf );
- te = newbuf + ( te - buf );
-
- /* Copy the new buffer in. */
- memcpy( newbuf, buf, have );
- delete[] buf;
- buf = newbuf;
- }
-
- input.read( p, space );
- int len = input.gcount();
- char *pe = p + len;
-
- /* If we see eof then append the eof var. */
- char *eof = 0;
- if ( len == 0 ) {
- eof = pe;
- execute = false;
- }
-
- %% write exec;
-
- /* Check if we failed. */
- if ( cs == rlscan_error ) {
- /* Machine failed before finding a token. I'm not yet sure if this
- * is reachable. */
- id->error(scan_loc()) << "scanner error" << endl;
- id->abortCompile( 1 );
- }
-
- /* Decide if we need to preserve anything. */
- char *preserve = ts;
-
- /* Now set up the prefix. */
- if ( preserve == 0 )
- have = 0;
- else {
- /* There is data that needs to be shifted over. */
- have = pe - preserve;
- memmove( buf, preserve, have );
- unsigned int shiftback = preserve - buf;
- if ( ts != 0 )
- ts -= shiftback;
- te -= shiftback;
-
- preserve = buf;
- }
- }
-
- delete[] buf;
-}