From 4ebe2c5f8adb33ae1525197012e790e01f5ea341 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 14 Mar 2020 12:57:05 +0200 Subject: removing ragel source code, makefile definitions releated to ragel --- ragel/ChangeLog | 1709 ---------------------------------- ragel/Makefile.am | 36 +- ragel/load.cc | 88 -- ragel/longest.cc | 571 ------------ ragel/main.cc | 31 - ragel/parsedata.cc | 1490 ------------------------------ ragel/parsetree.cc | 2199 -------------------------------------------- ragel/rlparse.kh | 148 --- ragel/rlparse.kl | 1943 --------------------------------------- ragel/rlparse.lm | 207 ----- ragel/rlreduce.lm | 2570 ---------------------------------------------------- ragel/rlscan.rl | 1193 ------------------------ ragel/xml.cc | 786 ---------------- ragel/xml.h | 81 -- 14 files changed, 9 insertions(+), 13043 deletions(-) delete mode 100644 ragel/ChangeLog delete mode 100644 ragel/load.cc delete mode 100644 ragel/longest.cc delete mode 100644 ragel/main.cc delete mode 100644 ragel/parsedata.cc delete mode 100644 ragel/parsetree.cc delete mode 100644 ragel/rlparse.kh delete mode 100644 ragel/rlparse.kl delete mode 100644 ragel/rlparse.lm delete mode 100644 ragel/rlreduce.lm delete mode 100644 ragel/rlscan.rl delete mode 100644 ragel/xml.cc delete mode 100644 ragel/xml.h diff --git a/ragel/ChangeLog b/ragel/ChangeLog deleted file mode 100644 index d095cab5..00000000 --- a/ragel/ChangeLog +++ /dev/null @@ -1,1709 +0,0 @@ -Ragel 7.0.1 - Aug XXX, 2018 -=========================== - -Condition implemenation rewritten. Previously utilized an extension of the - alphabet space to encode "character when A". The character was looked up, the - applicable condition set determined, a constant value was added to the - character value, then the real transition looked up. Now a more natural - implemation has the transition encoded in two levels of lists. The first - level is indexed by the alphabet character, which is unmodified. This tells - us which condition set to execute. The result of the condition execution is - an integer that is looked up in a second level list. This new condition - implementation allows for a much less complicated implemation, and does not - rely on available bits in the alphabet space. - -Conditions now properly execution on EOF. - -Added a Condition-based repetition operator. - -Ragel frontend is now colm-based. The grammar is separated into a core ragel - grammar that can then be extended for different host languages. - -Intermediate codegen language was added and non-C/ASM code generators are now - based on the intermediate language. Separate ragel executable files are used - to implement the different host languages. - -Restrictions on action-based jumping to and calling of state machines in - languages where it cannot be implemented properly (no goto in host language). - These statements must be replaced with the "next" version, which does not - jump out of the action, but instead causes the jump/call after the action - list. - -Added NFA features. This includes repetition and support for large unions of - expressions. The operator can create a deterministic prefix, the depth of - which is configurable, before NFA alternation begins. - -Consolodating code in the different code generation styles. - -Ragel 6.10 - Mar 24, 2017 -========================= - -C codegen: test P vs PE in goto/call/ret statements in EOF actions, just - before re-entering. If at the end of the input block then the EOF check is - jumped to. This change prevents overrunning the buffer if control flow is - issued in an EOF action without fixing the input pointer first. If a program - properly issues an fhold before the control flow the program won't be - affected. - -Updated action label generation. The previous set of conditions for - generating the label didn't cover actions coming from the eofAction pointer - (eof trans covered since it points into the set of transitions). - -Use separate signed/unsigned values for host type min/max. Using separate - values avoids the need to type cast before the data goes into FsmCtx structs. - Keep it in native types until it is used. - -Optionally do not generate entry point variables. Adds noentry write option - for data. - -Various warning elimination and build updates. - -Ragel 6.9 - Oct 13, 2014 -======================== - -updated command-line synopsis - -ocaml: fix missing semicolon - -ocaml: support -G1 - -ocaml: choose a unique name for type state - -ruby: reduce the amount of calls to GET_WIDE_KEY() - -union test case: warning fix - -omit line directives around expression-oriented write statements - -use AS_IF and test command to check if the DIST file is present - -added missing std:: using - -go: added '//line' directive support - -Ragel 6.8 - Feb 11, 2013 -======================== - - -The -G2 code generator for Go1 was rewritten. Table, flat and switch-based - code generators were added. (Anton Ageev) - -The CXXFLAGS variable is not longer set in the configure script. - -Ragel 6.7 - May 22, 2011 -======================== - -The C vim file now supports L,l on the end of literal numbers, other syntax - highlighting fixes. - -Added an explicit cast when modifying _trans to eliminate compiler warnings - on Windows/VC++ - -Fix for ruby 1.9 compatibility. - -Fix for include directories (-I option) on Windows/VC++ - -Can now rename data variable in C# code generator. - -Fix for non-char alphtype in C# code generator. - -Fix for signedness of wide char in C code generator. when comparing the wide - type against a literal we need to pick the right signedness for the literal. - -Fixed arithmetic overflow in generated C# code. The C# _acts and _nacts vars - should not be typed based on the size of the array elements they point to. - Fixes a bug reported by Attila Sztupák. - -Made the -L option work for Ruby. - -Enhanced ragel.m4 (from Diego). - -Applied GO patch from Justine Tunney. - -Applied D2 patch from Johannes Pfau. - -Applied Ocaml patch from ygrek. - -Ragel 6.6 - Dec 2, 2009 -======================= - -Applied a number of patches from Diego Elio 'Flameeyes' Pettenò. Should not - be modifying the program's arguments. Problem raised by const correctness in - gcc 4.4. Other const-correctness and include fixes provided. - -Fixed improper allocation of checks in makeIncludePathChecks. - -Fixed segfault when there are no machine instantiations. - -Fixed wrong line directives. Line directives need to use the fileName stored - in the InputLoc stuctures from the parse trees, not the root source file, - otherwise actions in included files will have the wrong source file names - associated with the text. - -Made a number of build system improvements. We locate the DIST file using - $srcdir and source it. It contains settings for build_parsers and - build_manual. This allows the user of a dist to enable only one. - -Added missing files to doc/Makefile.am and examples/Makefile.am. - -Added checks for pdflatex and fig2dev is build_manual is on. - -Use automake --foreign so we don't need to have INSTALL and NEWS present. - -Ragel VIM syntax files should be specialized by host language. Updated the - VIM syntax files. - -Added examples to the dist. Added unicode2ragel.rb to EXTRA_DIST in contrib. - -Moved unicode2ragel.rb to the contrib directory. - -Ragel 6.5 - May 18, 2009 -======================== - -Fixed a bug in graphviz generation. Ragel crashed when using -V and -M and - the specified machine referenced another machine that wasn't included in the - build. - -The name "CS" is in use on OpenSolaris, changed to vCS to ease compiling - Ragel there. - -Converted to automake. - -REALLY fixed a bug that was intended to be fixed in 6.4: - Fixed a problem reading hex numbers that have the high bit set when the - alphabet is signed and we are on 64 bit. This was reported by _why. The - fix was provided by Wialliam Morgan. The literal 0xffffffff was used for - a fully set long when -1L should be used instead. - A null patch (whitespace changes) must have gotten checked after I was - testing with and without the critical one-line patch and I forgot to enable - make sure it was enabled in the final checkin version. - -Ragel 6.4 - Mar 22, 2009 -======================== - -Moved back to a single executable. The old intermediate format can still be - generated using the -x option. Ragel was split into frontend and backend - programs in version 5.0. This was done to encourage interoperability with - other tools. Since then, ragel has been made to work with qfsm, with ragel - producing the intermediate format and qfsm consuming it. However, there has - been no use of Ragel as a consumer of state machine data, with Ragel used as - a code generator for DFAs. This is not surprising given that much of the - complexity of Ragel is in the frontend, where the regular language to DFA - compilation happens. Since the full benefits of the split have not - materialized, and the split increases the complexity for users, Ragel has - been made once again into a single executable. - -Applied a fix to the documentation Makefile from John D. Mitchell. - -Use CXXFLAGS instead of CFLAGS for C++ compiling. Patch from Diego - 'Flameeyes' Pettenò. - -Added support for DESTDIR variable. Patch from Diego 'Flameeyes' Pettenò. - -Added a script called unicode2ragel.rb for generating unicode machines to - the examples directory. From Rakan El-Khalil. - -Fixed a copy-paste error in the documentation that was reported by Jose - Quinteiro. - -Added three new write commands: - write start; - write first_final; - write error; - These generate a reference to the start, first final and error state. When - there are many different machine specifications in one file it is easy to - get the prefix for these wrong (especially when you do a lot of copy-pasting - of boilerplate). The problem can be avoided by using write commands. - -Fixed a problem reading hex numbers that have the high bit set when the - alphabet is signed and we are on 64 bit. This was reported by _why. The fix - was provided by Wialliam Morgan. The literal 0xffffffff was used for a fully - set long when -1L should be used instead. - -Ragel 6.3 - Aug 29, 2008 -======================== - -Fixed an assertion that is too strong. In the condition code we need to copy - transitions that have non-empty lmActionTable arrays so we don't assert - emptiness in the constructor. Lift out the assertion and copy the array in - the constructor. - -Fixed and improved multiple include prevention. We now track the entire - include history of a parser state to prevent duplicates. - -Fixed crash on failed lookup of goto/call/etc target. - -Ragel 6.2 - May 9, 2008 -======================= - -Bug fix: The lm_switch actions needs to set p from tokend when there is no - user action. - -Bug fix: when not using indices we can't use a transitions's id to identify - the eof transition to take. Instead add the transition to the end of the - transition list and store its position in a new var called pos. The pos var - is then used as the index. - -Bug fix: an fnext followed by an fbreak in -G2 was not working. The fbreak - was not aware that the fnext causes the cs variable to be forced active. In - this case fbreak does not need to save cs because it is already current. - -Bug fix: need to compute the low and high character-space keys from the - condition-trans overlap when computing an expansion. Can't use the range - supplied from the condition overlap since they may not match. An incorrect - machine that accepted 1(!cond1, !cond2) was generated for the following - grammar. This bug was reported by Tim Chklovski. - c = 2 @matched_c; - sc1 = 1..2 when cond1; - sc2 = 1..2 when cond2; - main := sc1 | c | sc2; - -Bug fix: error messages in start label analysis of join operations were - causing assertion failures because location info was not set. Fixed by - adding locations. - -Include and import file searching now searches for the file name given based - on the location of the current file, not ragel's current path. - Additional search locations can be given using the -I option. - -Rubinius code generation was updated to the latest Rubinius. Patch from Evan - Phoenix. - -Switched from strcasecmp to strcmp for testing long arguments. - -Applied a patch from Andrei Polushin for setting the error message format. - --error-format=gnu (default) - --error-fromat=msvc - -Now using the _WIN32 define instead of _WIN32. Other MSVC compilation - improvments from Andrei Polushin. - -Added the hyperref package to the manual. - -Ragel 6.1 - Mar 26, 2008 -======================== - -Scanners now ensure that any leaving actions at the end of a pattern are - executed. They are always executed before the pattern action. - -Added an option -d for turning off the removal of duplicate actions from - actions lists. - -Need to unset the final state status of the start state in kleene star if it - is set. It is possible to crash ragel when the warning is ignored. - -In the dot file generation we need to print any actions that are in - State::eofTrans. These come from scanners only. - -Use @docdir@ for the docdir Makefile variable. - -Check for ar and ranlib in the configure script. - -Ragel 6.0 - Jan 12, 2008 -======================== - -Removed the 'noend' write option from examples/atoi.rl. This example is - referenced a lot as a first example and as such it shouldn't contain a - special purpose write option like 'noend'. - -Introcuded the "eof" variable for indicating the end of file. The p variable - is checked against eof when the processing loop reaches the end of a block. - If p == eof at this time then the EOF actions are executed. The variable is - required only when EOF actions have been emebedded. - -The "write eof" command is no longer needed and was removed. - -Scanners now use EOF actions to generate tokens. This eliminates the need to - flush the last token. - -Restructured the Java driver; a switch statement with fallthrough cases are - now used to emulate gotos. - -Ruby code generation was also restructured. Gotos are elmulated using a - series of if tests. - -Went back to 3.X semantics for >, % and error actions. The > operator also - embeds a leaving action/priority into the start state if it is final. If EOF - happens in a state with a leaving operator then the leaving action is - executed. If EOF happens in a non-final state that has an error action, the - error action is executed. - -The "ragel" program now executes frontend and backend processes separately, - connecting them with a temporary file in the current directory. Without the - -x option the "ragel" program marshals arguments and calls the frontend and - backend. With the -x option the "ragel" program acts as the frontend only. - -Added name finding for executables. If any forward slash is found in argv0 - then it is assumed that the path is explicit and the path to the backend - executable should be derived from that. Whe check that location and also go - up one then inside a directory of the same name in case we are executing - from the source tree. If no forward slash is found it is assumed the file is - being run from the installed location. The PREFIX supplied during - configuration is used. - -On windows GetModuleFileNameEx is used to find out where the the current - process's binary is. That location is searched first. If that fails then we - go up one directory and look for the executable inside a directory of the - same name in case we are executing from the source tree. - -Changed the -l option in rlgen-cd to -L because it is covered in the - frontend. Added a passthrough in the frontend for the backend options. - -Dot file generation can now be invoked using the -V option to ragel. We - now require an input file. If standard in is used then we don't have a file - name on which to base the output. - -Able to build native windows executables using Cygwin+MinGW. - -Patch from David Waite: Large arrays are now created by copying in the data - from smaller arrays using System.arraycopy(). This eliminates the debug data - associated with explicit initialization statements. It is also much easier - on the java compiler which can run out of memory compiling very large - machines. The downside is that it takes slightly longer to initialize static - data at run time. - -The fbreak statement now advances p. - -In the :> :>> and <: operators it was possible for the priority assignment - to be bypassed via the zero length string. In :> this was fixed - automatically with the semantics change to the entering priority operator. - If the start state is final it now embeds a leaving action into it, - preventing persistance through the zero length string. In :>> and <: this - was fixed explicitly. With <: the entering priority operator was used and - with :> a special exception was added. Since it uses the finishing - transition operator it also adds a leaving priority to the start state if it - is final. - -Ranlib is now run on the archives. Patch from Kenny MacDermid. - -The case statement syntax in ruby code generation used a form depreciated in - Ruby 1.9. Updated it. - -Made a number of fixes that eliminate warnings in GCC 4.3. Mostly concern - the now depreciate automatic conversion of string contsants to "char*" type. - Other fixes include adding parenthesis around && within ||. - -The "tokstart" and "tokend" variables were changed to "ts" and "te". - -Ragel 5.25 - Dec 24, 2007 -========================= - -Fixed segfault reported by Ryan Phelps. Affected Java and Ruby code - generation. The dataExpr variable was not initialized. - -Fixed incorrect case label in test/runtests. Caused Objective-C tests to be - ignored. - -Added missing include to common.cpp. - -Ragel 5.24 - Sep 16, 2007 -========================= - -Applied patch from Victor Hugo Borja . This patch - implements -T1 -F0 -F1 and -G0 in the ruby code generator. Goto-driven code - generation is experimental and requires rubinius asm directives (specify - with --rbx option). These code generators pass all the ruby tests. - -If the condition embedding code runs out of available characters in the - keyspace an error message is emitted. - -The first example that appeared in the manual used the special-purpose - 'noend' write option. This caused confusion. Now a basic example appears - first. - -Added two new statements: prepush and postpop. These are code blocks that - are written out during call and return statements. The prepush code is - written immediately before pushing the current state to the state stack - during a call. The postpop code is written immediately after popping the - current state during return. These can be used to implement a dynamically - resizable stack. - -Ragel 5.23 - Jul 24, 2007 -========================= - -Eliminated the use of callcc as an alternative to goto. Instead, the named - breaks implementation used in the Java code generator is imitated using - control flow variables. - -Improved the error message given when there is a write statement but no - machine instantiations and hence no state machine. - -Documentation improvements: updates to "Machine Instantiation", "Write Init" - and "Write Exports" sectons. Added the "Variables Used by Ragel" section. - -Renamed "Entering Actions" to "Starting Actions." - -Other documentation updates. - -Ragel 5.22 - June 14, 2007 -========================== - -Bug fix: need to isolate the start state of a scanner before setting the - to-state and from-state actions which clear and set tokstart. This affected - very simple scanners only. Most scanners have an isolated start state due to - the pattern structure. - -Bug fix: when -S or -M was given the ragel version number was not emitted, - causing the backend to reject the intermediate format. From Tim Potter. - -The p varialbe is now set up at the beginning of a scanner action, rather - than at the end. This leaves scanner actions free to manipulate p and - removes the need for the special holdTE and execTE (TE for tokend) versions - of hold and exec. It also removes the need to set p = tokend-1 immediately - before any control flow. We loose the ability to determine where in the - input stream a scanner action is executed, however this information is of - little use because it is primarily an artifact of the scanner implementation - (sometimes the last char, other times later on). The gains of this change - are consistency and simplicity. - -The "data" variable (used in Java and Ruby code generation only) can now be - overridden using the variable statement. - -Ragel 5.21 - May 9, 2007 -======================== - -Fixed an inconsistency in the value of p following an error. In the C - directly executable code (rlgen-cd -G2) p is left at the character where - the error occurred, which is correct. In all other code generators it was - left at the following character. This was fixed. Now in all code generators - p is left at the character where the error occurred. - -Bug fix: when fhold was used in scanner pattern actions which get executed - on the last character of the pattern (pattern matches which do not require - any lookahead), fhold was modifying p instead of tokend. This was fixed and - the patact.rl test was modified to cover the case. - -Fixed typos in the guide, improved the state action embedding operator - section and added subsections on the variable, import, and export - statements. - -Implemented a better solution than the pri hack for resolving the '-' - ambiguity: force a shortest match of term. - -Fixed bugs in the binary searching for condition keys in both the Ruby and - Java code generation. - -Can now embed the negative sense of a condition. Added a language- - independent test case for this feature and the necessary transformation - support. - -Added new condition embedding syntax: - expr inwhen cond - The transitions into the machine (starting transitions). - expr outwhen cond - The pending transitions out of the machine. - -The argument to the variable statement which affects the name of the current - state variable was changed from "curstate" to "cs" (the default name used - for the current state) - -Implemented the other variables names in the variable statement. Now all - variables (p, pe, cs, top, stack, act, tokstart, tokend) can be renamed. - -Parse errors in the intermediate XML file now cause the backend to exit - immediately rather then forge on. The recovery infrastructure isn't there - and segfaults are likely. - -When no input is given to the backend program, it should not print an error - message, it should just return a non-zero exit status. The assumption is - that the frontend printed an error. - -The version number is now included in the intermediate file. An error is - emitted if there is a mismatch. - -The alphabet type is now communicated from the frontend to the backend using - a one-word internal name instead of an array offset. - -The Ruby host language types had been just copied from Java. Reduced them to - two basic types: char and int, both signed with the usual C sizes. - -Ragel 5.20 - Apr 7, 2007 -======================== - -The cs variable is now always initialized, unless the "nocs" option is given - to the write init command. If there is no main machine, cs is initialized to - the entry point defined by the last machine instantiation. - -A number of fixes were made to the Ruby code generator. - -The frontend now scans ruby comments and regular expressions. - -A transformation for Ruby was added to the language-independent test suite. - The Ruby code generator passes on all the language-independent tests. - -A new Ruby test and two language-independent tests were added. - -Some portability fixes were made (Patches from Josef Goettgens and Aaron - Campbell). - -Fixed a make dependency bug which caused a problem for parallel building - (Patch from Jeremy Hinegardner). - -Ragel 5.19 - Mar 14, 2007 -========================= - -Added an import statement to ragel. This statement takes a literal string as - an argument, interprets it as a file name, then scrapes the file for - sequences of tokens that match the following forms. Tokens inside ragel - sections are ignored. An example is in test/import1.rl - name = number - name = lit_string - "define" name number - "define" name lit_string - -Added an export mechanism which writes defines for single character machines - that have been tagged with the export keyword in their definition. Defines - are used for C, ints for D, Java and Ruby. Examples of the export feature - are in test/export*.rl. - -All machine instantiations are now always generated, even if they are not - referenced. In the backend, entry points for all instantiations are written - out alongside start, error and first final states. - -If the main machine is not present then do not emit an error. Generate the - machine without a start state and do not initialize cs in the write init - code. - -Added an option -l to rlgen-cd which inhibits the writing of #line - directives. - -Added a new syntax for verbose embeddings. This adds parentheses: - $from(action_name); - Verbose embeddings without parentheses can make code difficult to read - because they force a space in the middle of an action embedding. There is a - tendency to associtate spaces with concatenation. Without syntax - highlighting to make it clear that the embedding type is a keyword, the - problem is especially bad. The danger is that a verbose embedding could be - read as an embedding of the keyword representing the empbedding type. With - parentheses, verbose embeddings read much more clearly. - -Conditions now have a forced order when more than one is executed on a - single character. Previously ordering relied on pointers, which caused - results to vary by compiler. Ordering is now done using conditon action - declaration order. This fixes the failure of cond4.rl which occured with - g++ 4.1 and other compiler versions. - -In the port from flex to ragel, the name separator :: in Ragel code was - lost. Added it back. - -In the examples directory switched from rlcodegen to rlgen-cd. Silenced a - warning in statechart.rl. - -In the root makefile the distclean target was fixed. It was calling clean in - the subdirs. In docs, the clean target was not deleting the new manpages for - the rlgen-* programs. Fixed. - -Portability and other fixes from Josef Goettgens were applied. - -The @datadir@ and @mandir@ variables are made use of in doc/Makefile.in for - specifying where documentation should be installed. Patch from Marcus - Rueckert. - -Ragel 5.18 - Feb 13, 2007 -========================= - -There is now a 100% correspondence between state id numbers in the - intermediate XML file, Graphviz dot files and generated code. This was - achieved by moving code which determines if the error state is necessary - into the frontend, and then assigning state numbers before writing out the - intermediate file. - -Backened class structure was reorganized to make it easier to add new code - generators without having to also modify the existing code generators. - -The C and D code generation executable was changed to rlgen-cd. - -The Java code generation was split out into it's own exectuable (rlgen-java) - to allow it to freely diverge from the C/D-based code generation. - -The graphviz dot file generation was also split out to it's own executable - (rlgen-dot). - -The Ruby code generation patch from Victor Hugo Borja was added. This is - highly experimental code and is not yet completely functional. It is in the - executable rlgen-ruby. - -The problem with large state machine machines in Java was fixed. This - problem was discovered by Colin Fleming, who also contributed a patch. - Rather than specify arrays as comma-separated lists of literals, array - initialization is now done in a static function. This is the approach used - by the Java compiler. Unlike the compiler Ragel is careful split large - initilization functions. - -The manual was expanded and reorganized somewhat. - -Eliminated per-example directories in examples/. - -Made some fixes to the pullscan.rl example. - -In the frontend CR characters are now treated as whitespace. - -Updated to the latest aapl. This completely eliminates the shallowCopy - function. With that, a definitive memory leak is fixed. - -Control codes with escape sequences are now printable characters (-p - option). Also, the space character is now printed as SP. - -Fixed the null dereference and consequential segfault which occurred when - trying to create empty machines with [] and // and /a[]b/. - -Fixed the segfault which occured when a machine reference failed. - -Discontinuing ragel.spec. It is more appropriate for this to be written by - package maintenance developers. - -Ragel 5.17 - Jan 28, 2007 -========================= - -The scanners and parsers in both the frontend and backend programs were - completely rewritten using Ragel and Kelbt. - -The '%when condition' syntax was functioning like '$when condition'. This - was fixed. - -In the Vim syntax file fixes to the matching of embedding operators were - made. Also, improvements to the sync patterns were made. - -Added pullscan.rl to the examples directory. It is an example of doing - pull-based scanning. Also, xmlscan.rl in rlcodegen is a pull scanner. - -The introduction chapter of the manual was improved. The manually-drawn - figures for the examples were replaced with graphviz-drawn figures. - -Ragel 5.16 - Nov 20, 2006 -========================= - -Policy change: the fhold and fexec directives did not function correctly in - scanner pattern actions. In this context manipulations of p may be lost or - made invalid. In the previous version of Ragel they were banned because of - this. Instead of banning these directives they have been fixed. The fexec - and fhold directives now manipulate tokend, which is now always used to - update p when the action terminates. - -Ragel 5.15 - Oct 31, 2006 -========================= - -A language independent test harness was introduced. Test cases can be - written using a custom mini-language in the embedded actions. This - mini-language is then translated to C, D and Java when generating the - language-specific test cases. - -Several existing tests have been ported to the language-independent format - and a number of new language-independent test cases have been added. - -The state-based embedding operators which access states that are not the - start state and are not final (the 'middle' states) have changed. They - were: - <@/ eof action into middle states - <@! error action into middle states - <@^ local error action into middle states - <@~ to-state action into middle states - <@* from-state action into middle states - They are now: - <>/ eof action into middle states - <>! error action into middle states - <>^ local error action into middle states - <>~ to-state action into middle states - <>* from-state action into middle states - -The verbose form of embeddings using the <- operator have been removed. - This syntax was difficult to remember. - -A new verbose form of state-based embedding operators have been added. - These are like the symbol versions, except they replace the symbols: - / ! ^ ~ * - with literal keywords: - eof err lerr to from - -The following words have been promoted to keywords: - when eof err lerr to from - -The write statment now gets its own lexical scope in the scanner to ensure - that commands are passed through as is (not affected by keywords). - -Bug fix: in the code generation of fret in scanner actions the adjustment to - p that is needed in some cases (dependent on content of patterns) was not - happening. - -The fhold directive, which decrements p, cannot be permitted in the pattern - action of a scanner item because it will not behave consistently. At the end - of a pattern action p could be decremented, set to a new value or left - alone. This depends on the contents of the scanner's patterns. The user - cannot be expected to predict what will happen to p. - -Conditions in D code require a cast to the widec type when computing widec. - -Like Java, D code also needs if (true) branches for control flow in actions - in order to fool the unreachable code detector. This is now abstracted in - all code generators using the CTRL_FLOW() function. - -The NULL_ITEM value in java code should be -1. This is needed for - maintaining tokstart. - -Ragel 5.14 - Oct 1, 2006 -======================== - -Fixed the check for use of fcall in actions embedded within longest match - items. It was emitting an error if an item's longest-match action had an - fcall, which is allowed. This bug was introduced while fixing a segfault in - version 5.8. - -A new minimization option was added: MinimizeMostOps (-l). This option - minimizes at every operation except on chains of expressions and chains of - terms (eg, union and concat). On these chains it minimizes only at the last - operation. This makes test cases with many states compile faster, without - killing the performance on grammars like strings2.rl. - -The -l minimiziation option was made the default. - -Fixes to Java code: Use of the fc value did not work, now fixed. Static data - is now declared with the final keyword. Patch from Colin Fleming. Conditions - now work when generating Java code. - -The option -p was added to rlcodegen which causes printable characters to be - printed in GraphViz output. Patch from Colin Fleming. - -The "element" keyword no longer exists, removed from vim syntax file. - Updated keyword highlighting. - -The host language selection is now made in the frontend. - -Native host language types are now used when specifying the alphtype. - Previously all languages used the set defined by C, and these were mapped to - the appropriate type in the backend. - -Ragel 5.13 - Sep 7, 2006 -======================== - -Fixed a careless error which broke Java code generation. - -Ragel 5.12 - Sep 7, 2006 -======================== - -The -o flag did not work in combination with -V. This was fixed. - -The split code generation format uses only the required number of digits - when writing out the number in the file name of each part. - -The -T0, -F0 and -G0 codegens should write out the action list iteration - variables only when there are regular, to state or from state actions. The - code gens should not use anyActions(). - -If two states have the same EOF actions, they are written out in the finish - routine as one case. - -The split and in-place goto formats would sometimes generate _out when it is - not needed. This was fixed. - -Improved the basic partitioning in the split code gen. The last partition - would sometimes be empty. This was fixed. - -Use of 'fcall *' was not causing top to be initialized. Fixed. - -Implemented a Java backend, specified with -J. Only the table-based format - is supported. - -Implemented range compression in the frontend. This has no effect on the - generated code, however it reduces the work of the backend and any programs - that read the intermediate format. - -Ragel 5.11 - Aug 10, 2006 -========================= - -Added a variable to the configure.in script which allows the building of - the parsers to be turned off (BUILD_PARSERS). Parser building is off by - default for released versions. - -Removed configure tests for bison defines header file. Use --defines=file - instead. - -Configure script doesn't test for bison, flex and gperf when building of the - parsers is turned off. - -Removed check for YYLTYPE structure from configure script. Since shipped - code will not build parsers by default, we don't need to be as accomodating - of other versions of bison. - -Added a missing include that showed up with g++ 2.95.3. - -Failed configure test for Objective-C compiler is now silent. - -Ragel 5.10 - Jul 31, 2006 -========================= - -Moved the check for error state higher in the table-based processing loop. - -Replaced naive implementations of condition searching with proper ones. In - the table-based formats the searching is also table-based. In the directly - executed formats the searching is also directly executable. - -The minimization process was made aware of conditions. - -A problem with the condition implementation was fixed. Previously we were - taking pointers to transitions and then using them after a call to - outTransCopy, which was a bad idea because they may be changed by the call. - -Added test mailbox3.rl which is based on mailbox2.rl but includes conditions - for restricting header and message body lengths. - -Eliminated the initial one-character backup of p just before resuming - execution. - -Added the -s option to the frontend for printing statistics. This currently - includes just the number of states. - -Sped up the generation of the in-place goto-driven (-G2) code style. - -Implemented a split version of in-place goto-driven code style. This code - generation style is suitable for producing fast implementations of very - large machines. Partitioning is currently naive. In the future a - high-quality partitioning program will be employed. The flag for accessing - this feature is -Pn, where n is the number of partitions. - -Converted mailbox1.rl, strings2.rl and cppscan1.rl tests to support the - split code generation. - -Fixes and updates were made to the runtests script: added -c for compiling - only, changed the -me option to -e, and added support for testing the split - code style. - -Ragel 5.9 - Jul 19, 2006 -======================== - -Fixed a bug in the include system which caused malformed output from the - frontend when the include was made from a multi-line machine spec and the - included file ended in a single line spec (or vice versa). - -Static data is now const. - -Actions which referenced states but were not embedded caused the frontend to - segfault, now fixed. - -Manual now built with pdflatex. - -The manual was reorganized and expanded. Chapter sequence is now: - Introduction, Constructing Machines, Embedding Actions, Controlling - Nondeterminism and Interfacing to the Host program. - -Ragel 5.8 - Jun 17, 2006 -======================== - -The internal representation of the alphabet type has been encapsulated - into a class and all operations on it have been defined as C++ operators. - -The condition implementation now supports range transitions. This allows - conditions to be embedded into arbitrary machines. Conditions are still - exprimental. - -More condition embedding operators were added - 1. Isolate the start state and embed a condition into all transitions - leaving it: - >when cond OR >?cond - 2. Embed a condition into all transitions: - when cond OR $when cond OR $?cond - 3. Embed a condition into pending out transitions: - %when cond OR %?cond - -Improvements were made to the determinization process to support pending out - conditions. - -The Vim sytax file was fixed so that :> doesn't cause the match of a label. - -The test suite was converted to a single-file format which uses less disk - space than the old directory-per-test format. - -Ragel 5.7 - May 14, 2006 -======================== - -Conditions will not be embedded like actions because they involve a - manipulation of the state machine they are specified in. They have therefore - been taken out of the verbose action embedding form (using the <- compound - symbol). A new syntax for specifying conditions has been created: - m = '\n' when {i==4}; - -Fixed a bug which prevented state machine commands like fcurs, fcall, fret, - etc, from being accounted for in from-state actions and to-state actions. - This prevented some necessary support code from being generated. - -Implemented condition testing in remaining code generators. - -Configure script now checks for gperf, which is required for building. - -Added support for case-insensitive literal strings (in addition to regexes). - A case-insensitive string is made by appending an 'i' to the literal, as in - 'cmd'i or "cmd"i. - -Fixed a bug which caused all or expressions inside of all regular - expressions to be case-insensitive. For example /[fo]o bar/ would make the - [fo] part case-insensitive even though no 'i' was given following the - regular expression. - -Ragel 5.6 - Apr 1, 2006 -======================= - -Added a left-guarded concatenation operator. This operator <: is equivalent - to ( expr1 $1 . expr2 >0 ). It is useful if you want to prefix a sequence - with a sequence of a subset of the characters it matches. For example, one - can consume leading whitespace before tokenizing a sequence of whitespace - separated words: ( ' '* <: ( ' '+ | [a-z]+ )** ) - -Removed context embedding code, which has been dead since 5.0. - -Ragel 5.5 - Mar 28, 2006 -======================== - -Implemented a case-insensitive option for regular expressions: /get/i. - -If no input file is given to the ragel program it reads from standard input. - -The label of the start state has been changed from START to IN to save on - required screen space. - -Bug fix: \0 was not working in literal strings, due to a change that reduced - memory usage by concatenating components of literal strings. Token data - length is now passed from the scanner to the paser so that we do not need to - rely on null termination. - -Ragel 5.4 - Mar 12, 2006 -======================== - -Eliminated the default transition from the frontend implementation. This - default transition was a space-saving optimization that at best could reduce - the number of allocated transitions by one half. Unfortunately it - complicated the implementation and this stood in the way of introducing - conditionals. The default transition may be reintroduced in the future. - -Added entry-guarded concatenation. This operator :>, is syntactic sugar - for expr1 $0 . expr >1. This operator terminates the matching of the first - machine when a first character of the second machine is matched. For - example in any* . ';' we never leave the any* machine. If we use any* :> ';' - then the any* machine is terminiated upon matching the semi-colon. - -Added finish-guarded concatenation. This operator :>>, is syntactic sugar - for expr1 $0 . expr @1. This operator is like entry guarded concatenation - except the first machine is terminated when the second machine enters a - final state. This is useful for delaying the guard until a full pattern is - matched. For example as in '/*' any* :>> '*/'. - -Added strong subtraction. Where regular subtraction removes from the first - machine any strings that are matched by the second machine, strong - subtraction removes any strings from the first that contain any strings of - the second as a substring. Strong subtraction is syntactic sugar for - expr1 - ( any* expr2 any* ). - -Eliminated the use of priorities from the examples. Replaced with - subtraction, guarded concatenation and longest-match kleene star. - -Did some initial work on supporting conditional transitions. Far from - complete and very buggy. This code will only be active when conditionals are - used. - -Ragel 5.3 - Jan 27, 2006 -======================== - -Added missing semi-colons that cause the build to fail when using older - versions of Bison. - -Fix for D code: if the contents of an fexec is a single word, the generated - code will get interpreted as a C-style cast. Adding two brackets prevents - this. Can now turn eliminate the "access this.;" in cppscan5 that was used to - get around this problem. - -Improved some of the tag names in the intermediate format. - -Added unsigned long to the list of supported alphabet types. - -Added ids of actions and action lists to XML intermediate format. Makes it - more human readable. - -Updated to latest Aapl package. - -Ragel 5.2 - Jan 6, 2006 -======================== - -Ragel emits an error if the target of fentry, fcall, fgoto or fnext is inside - a longest match operator, or if an action embedding in a longest match - machine uses fcall. The fcall command can still be used in pattern actions. - -Made improvements to the clang, rlscan, awkemu and cppscan examples. - -Some fixes to generated label names: they should all be prefixed with _. - -A fix to the Vim syntax highlighting script was made - -Many fixes and updates to the documentation. All important features and - concepts are now documented. A second chapter describing Ragel's use - was added. - -Ragel 5.1 - Dec 22, 2005 -======================== - -Fixes to the matching of section delimiters in Vim syntax file. - -If there is a longest match machine, the tokend var is now initialized by - write init. This is not necessary for correct functionality, however - prevents compiler warnings. - -The rlscan example was ported to the longest match operator and changed to - emit XML data. - -Fix to the error handling in the frontend: if there are errors in the lookup - of names at machine generation time then do not emit anything. - -If not compiling the full machine in the frontend (by using -M), avoid - errors and segfaults caused by names that are not part of the compiled - machine. - -Longest match bug fix: need to init tokstart when returing from fsm calls - that are inside longest match actions. - -In Graphviz drawing, the arrow into the start state is not a real - transition, do not draw to-state actions on the label. - -A bug fix to the handling of non-tag data within an XML tag was made. - -Backend exit value fixed: since the parser now accepts nothing so as to - avoid a redundant parse error when the frontend dies, we must force an - error. The backend should now be properly reporting errors. - -The longest match machine now has it's start state set final. An LM machine - is in a final state when it has not matched anything, when it has matched - and accepted a token and is ready for another, and when it has matched a - token but is waiting for some lookahead before determining what to do about - it (similar to kleene star). - -Element statement removed from some tests. - -Entry point names are propagated to the backend and used to label the entry - point arrows in Graphviz output. - -Ragel 5.0 - Dec 17, 2005 -======================== - (additional details in V5 release notes) - -Ragel has been split into two executables: A frontend which compiles - machines and emits them in an XML format, and a backend which generates code - or a Graphviz dot file from the XML input. The purpose of this split is to - allow Ragel to interface with other tools by means of the XML intermediate - format and to reduce complexity by strictly separating the previously - entangled phases. The intermediate format will provide a better platform - inspecting compiled machines and for extending Ragel to support other host - languages. - -The host language interface has been reduced significantly. Ragel no longer - expects the machine to be implemented as a structure or class and does not - generate functions corresponding to initialization, execution and EOF. - Instead, Ragel just generates the code of these components, allowing all of - them to be placed in a single function if desired. The user specifies a - machine in the usual manner, then indicates at which place in the program - text the state machine code is to be generated. This is done using the write - statement. It is possible to specify to Ragel how it should access the - variables it needs (such as the current state) using the access statement. - -The host language embedding delimiters have been changed. Single line - machines start with '%%' and end at newline. Multiline machines start with - '%%{' and end with '}%%'. The machine name is given with the machine - statement at the very beginning of the specification. This purpose of this - change is to make it easier separate Ragel code from the host language. This - will ease the addition of supported host languages. - -The structure and class parsing which was previously able to extract a - machine's name has been removed since this feature is dependent on the host - language and inhibits the move towards a more language-independent frontend. - -The init, element and interface statements have been made obsolete by the - new host language interface and have been removed. - -The fexec action statement has been changed to take only the new position to - move to. This statement is more useful for moving backwards and reparsing - input than for specifying a whole new buffer entirely and has been shifted - to this new use. Giving it only one argument also simplifies the parsing of - host code embedded in a Ragel specification. This will ease the addition of - supported host languages. - -Introduced the fbreak statement, which allows one to stop processing data - immediately. The machine ends up in the state that the current transition - was to go to. The current character is not changed. - -Introduced the noend option for writing the execute code. This inhibits - checking if we have reached pe. The machine will run until it goes into the - error state or fbreak is hit. This allows one to parse null-terminate - strings without first computing the length. - -The execute code now breaks out of the processing loop when it moves into - the error state. Previously it would run until pe was hit. Breaking out - makes the noend option useful when an error is encountered and allows - user code to determine where in the input the error occured. It also - eliminates needlessly iterating the input buffer. - -Introduced the noerror, nofinal and noprefix options for writing the machine - data. The first two inhibit the writing of the error state and the - first-final state should they not be needed. The noprefix eliminates the - prefixing of the data items with the machine name. - -Support for the D language has been added. This is specified in the backend - with the -D switch. - -Since the new host language interface has been reduced considerably, Ragel - no longer needs to distinguish between C-based languages. Support for C, C++ - and Objective-C has been folded into one option in the backend: -C - -The code generator has been made independent of the languages that it - supports by pushing the language dependent apsects down into the lower - levels of the code generator. - -Many improvements to the longest match construction were made. It is no - longer considered experimental. A longest match machine must appear at the - top level of a machine instantiation. Since it does not generate a pure - state machine (it may need to backtrack), it cannot be used as an operand to - other operators. - -References to the current character and current state are now completely - banned in EOF actions. - -Ragel 4.2 - Sep 16, 2005 -======================== - (additional details in V4 release notes) - -Fixed a bug in the longest match operator. In some states it's possible that - we either match a token or match nothing at all. In these states we need to - consult the LmSwitch on error so it must be prepared to execute an error - handler. We therefore need to init act to this error value (which is zero). - We can compute if we need to do this and the code generator emits the - initialization only if necessary. - -Changed the definition of the token end of longest match actions. It now - points to one past the last token. This makes computing the token length - easier because you don't have to add one. The longest match variables token - start, action identifier and token end are now properly initialized in - generated code. They don't need to be initialized in the user's code. - -Implemented to-state and from-state actions. These actions are executed on - transitions into the state (after the in transition's actions) and on - transitions out of the state (before the out transition's actions). See V4 - release notes for more information. - -Since there are no longer any action embedding operators that embed both on - transitions and on EOF, any actions that exist in both places will be there - because the user has explicitly done so. Presuming this case is rare, and - with code duplication in the hands of the user, we therefore give the EOF - actions their own action switch in the finish() function. This is further - motivated by the fact that the best solution is to do the same for to-state - and from-state actions in the main loop. - -Longest match actions can now be specified using a named action. Since a - word following a longest match item conflicts with the concatenation of a - named machine, the => symbol must come immediately before a named action. - -The longest match operator permits action and machine definitions in the - middle of a longest match construction. These are parsed as if they came - before the machine definition they are contained in. Permitting action and - machine definitions in a longest match construction allows objects to be - defined closer to their use. - -The longest match operator can now handle longest match items with no - action, where previously Ragel segfaulted. - -Updated to Aapl post 2.12. - -Fixed a bug in epsilon transition name lookups. After doing a name lookup - the result was stored in the parse tree. This is wrong because if a machine - is used more than once, each time it may resolve to different targets, - however it will be stored in the same place. We now store name resolutions - in a separated data structure so that each walk of a parse tree uses the - name resolved during the corresponding walk in the name lookup pass. - -The operators used to embed context and actions into states have been - modified. The V4 release notes contain the full details. - -Added zlen builtin machine to represent the zero length machine. Eventually - the name "null" will be phased out in favour of zlen because it is unclear - whether null matches the zero length string or if it does not match any - string at all (as does the empty builtin). - -Added verbose versions of action, context and priority embedding. See the V4 - release notes for the full details. A small example: - machine <- all exec { foo(); } <- final eof act1 - -Bugfix for machines with epsilon ops, but no join operations. I had - wrongfully assumed that because epsilon ops can only increase connectivity, - that no states are ever merged and therefore a call to fillInStates() is not - necessary. In reality, epsilon transitions within one machine can induce the - merging of states. In the following, state 2 follows two paths on 'i': - main := 'h' -> i 'i h' i: 'i'; - -Changed the license of the guide from a custom "do not propagate modified - versions of this document" license to the GPL. - -Ragel 4.1 - Jun 26, 2005 -======================== - (additional details in V4 release notes) - -A bug in include processing was fixed. Surrounding code in an include file - was being passed through to the output when it should be ignored. Includes - are only for including portions of another machine into he current. This - went unnoticed because all tested includes were wrapped in #ifndef ... - #endif directives and so did not affect the compilation of the file making - the include. - -Fixes were made to Vim syntax highlighting file. - -Duplicate actions are now removed from action lists. - -The character-level negation operator ^ was added. This operator produces a - machine that matches single characters that are not matched by the machine - it is applied to. This unary prefix operator has the same precedence level - as !. - -The use of + to specify the a positive literal number was discontinued. - -The parser now assigns the subtraction operator a higher precedence than - the negation of literal number. - -Ragel 4.0 - May 26, 2005 -======================== - (additional details in V4 release notes) - -Operators now strictly embed into a machine either on a specific class of - characters or on EOF, but never both. This gives a cleaner association - between the operators and the physical state machine entitites they operate - on. This change is made up of several parts: - 1. '%' operator embeds only into leaving characters. - 2. All global and local error operators only embed on error character - transitions, their action will not be triggerend on EOF in non-final - states. - 3. EOF action embedding operators have been added for all classes of states - to make up for functionality removed from other operators. These are - >/ $/ @/ %/. - 4. Start transition operator '>' no longer implicitly embeds into leaving - transtions when start state is final. - -Ragel now emits warnings about the improper use of statements and values in - action code that is embedded as an EOF action. Warnings are emitted for fpc, - fc, fexec, fbuf and fblen. - -Added a longest match construction operator |* machine opt-action; ... *|. - This is for repetition where an ability to revert to a shorter, previously - matched item is required. This is the same behaviour as flex and re2c. The - longest match operator is not a pure FSM construction, it introduces - transitions that implicitly hold the current character or reset execution to - a previous location in the input. Use of this operator requires the caller - of the machine to occasionally hold onto data after a call to the exectute - routine. Use of machines generated with this operator as the input to other - operators may have undefined results. See examples/cppscan for an example. - This is very experimental code. - -Action ids are only assigned to actions that are referenced in the final - constructed machine, preventing gaps in the action id sequence. Previously - an action id was assigned if the action was referenced during parsing. - -Machine specifications now begin with %% and are followed with an optional - name and either a single Ragel statement or a sequence of statements - enclosed in {}. - -Ragel no longer generates the FSM's structure or class. It is up to the user - to declare the structure and to give it a variable named curs of type - integer. If the machine uses the call stack the user must also declare a - array of integers named stack and an integer variable named top. - -In the case of Objective-C, Ragel no longer generates the interface or - implementation directives, allowing the user to declare additional methods. - -If a machine specification does not have a name then Ragel tries to find a - name for it by first checking if the specification is inside a struct, class - or interface. If it is not then it uses the name of the previous machine - specification. If still no name is found then this is an error. - -Fsm specifications now persist in memory and statements accumulate. - -Ragel now has an include statement for including the statements of a machine - spec in another file (perhaps because it is the corresponding header file). - The include statement can also be used to draw in the statements of another - fsm spec in the current file. - -The fstack statement is now obsolete and has been removed. - -A new statement, simply 'interface;', indicates that ragel should generate - the machine's interface. If Ragel sees the main machine it generates the - code sections of the machine. Previously, the header portion was generated - if the (now removed) struct statement was found and code was generated if - any machine definition was found. - -Fixed a bug in the resolution of fsm name references in actions. The name - resolution code did not recurse into inline code items with children - (fgoto*, fcall*, fnext*, and fexec), causing a segfault at code generation - time. - -Cleaned up the code generators. FsmCodeGen was made into a virtual base - class allowing for the language/output-style specific classes to inherit - both a language specific and style-specific base class while retaining only - one copy of FsmCodeGen. Language specific output can now be moved into the - language specific code generators, requiring less duplication of code in the - language/output-style specific leaf classes. - -Fixed bugs in fcall* implementation of IpgGoto code generation. - -If the element type has not been defined Ragel now uses a constant version - of the alphtype, not the exact alphtype. In most cases the data pointer of - the execute routine should be const. A non-const element type can still be - defined with the element statement. - -The fc special value now uses getkey for retrieving the current char rather - than *_p, which is wrong if the element type is a structure. - -User guide converted to TeX and updated for new 4.0 syntax and semantics. - -Ragel 3.7 - Oct 31, 2004 -======================== - -Bug fix: unreferenced machine instantiations causing segfault due to name - tree and parse tree walk becomming out of syncronization. - -Rewrote representation of inline code blocks using a tree data structure. - This allows special keywords such as fbuf to be used as the operatands of - other fsm commands. - -Documentation updates. - -When deciding whether or not to generate machine instantiations, search the - entire name tree beneath the instantiation for references, not just the - root. - -Removed stray ';' in keller2.rl - -Added fexec for restarting the machine with new buffer data (state stays the - same), fbuf for retrieving the the start of the buf, and fblen for - retrieving the orig buffer length. - -Implemented test/cppscan2 using fexec. This allows token emitting and restart - to stay inside the execute routine, instead of leaving and re-entering on - every token. - -Changed examples/cppscan to use fexec and thereby go much faster. - -Implemented flex and re2c versions of examples/cppscan. Ragel version - goes faster than flex version but not as fast as re2c version. - -Merged in Objective-C patch from Erich Ocean. - -Turned off syncing with stdio in C++ tests to make them go faster. - -Renamed C++ code generaion classes with the Cpp Prefix instead of CC to make - them easier to read. - -In the finish function emit fbuf as 0 cast to a pointer to the element type - so it's type is not interpreted as an integer. - -The number -128 underflows char alphabets on some architectures. Removed - uses of it in tests. - -Disabled the keller2 test because it causes problems on many architectures - due to its large size and compilation requirements. - -Ragel 3.6 - Jul 10, 2004 -======================== - -Many documentation updates. - -When resolving names, return a set of values so that a reference in an - action block that is embedded more than once won't report distinct entry - points that are actually the same. - -Implemented flat tables. Stores a linear array of indices into the - transition array and only a low and high key value. Faster than binary - searching for keys but not usable for large alphabets. - -Fixed bug in deleting of transitions leftover from converstion from bst to - list implementation of transitions. Other code cleanup. - -In table based output calculate the cost of using an index. Don't use if - cheaper. - -Changed fstate() value available in init and action code to to fentry() to - reflect the fact that the values returned are intended to be used as targets - in fgoto, fnext and fcall statements. The returned state is not a unique - state representing the label. There can be any number of states representing - a label. - -Added keller2 test, C++ scanning tests and C++ scanning example. - -In table based output split up transitions into targets and actions. This - allows actions to be omitted. - -Broke the components of the state array into separate arrays. Requires - adding some fields where they could previously be omitted, however allows - finer grained control over the sizes of items and an overal size reduction. - Also means that state numbers are not an offset into the state array but - instead a sequence of numbers, meaning the context array does not have any - wasted bits. - -Action lists and transition also have their types chosen to be the smallest - possible for accomodating the contained values. - -Changed curs state stored in fsm struct from _cs to curs. Keep fsm->curs == - -1 while in machine. Added tests curs1 and curs2. - -Implemented the notion of context. Context can be embedded in states using - >:, $:, @: and %: operators. These embed a named context into start states, - all states, non-start/non-final and final states. If the context is declared - using a context statment - context name; - then the context can be quered for any state using fsm_name_ctx_name(state) - in C code and fsm_name::ctx_name(state) in C++ code. This feature makes it - possible to determine what "part" of the machine is currently active. - -Fixed crash on machine generation of graphs with no final state. If there - is no reference to a final state in a join operation, don't generate one. - -Updated Vim sytax: added labels to inline code, added various C++ keywords. - Don't highlight name separations as labels. Added switch labels, improved - alphtype, element and getkey. - -Fixed line info in error reporting of bad epsilon trans. - -Fixed fstate() for tab code gen. - -Removed references to malloc.h. - -Ragel 3.5 - May 29, 2004 -======================== - -When parse errors occur, the partially generated output file is deleted and - an non-zero exit status is returned. - -Updated Vim syntax file. - -Implemented the setting of the element type that is passed to the execute - routine as well as method for specifying how ragel should retrive the key - from the element type. This lets ragel process arbitrary structures inside - of which is the key that is parsed. - element struct Element; - getkey fpc->character; - -The current state is now implemented with an int across all machines. This - simplifies working with current state variables. For example this allows a - call stack to be implemented in user code. - -Implemented a method for retrieving the current state, the target state, and - any named states. - fcurs -retrieve the current state - ftargs -retrieve the target state - fstate(name) -retrieve a named state. - -Implemented a mechanism for jumping to and calling to a state stored in a - variable. - fgoto *; -goto the state returned by the C/C++ expression. - fcall *; -call the state returned by the C/C++ expression. - -Implemented a mechanism for specifying the next state without immediately - transfering control there (any code following statement is executed). - fnext label; -set the state pointed to by label as the next state. - fnext *; -set the state returned by the C/C++ expression as the - next. - -Action references are determined from the final machine instead of during - the parse tree walk. Some actions can be referenced in the parse tree but not - show up in the final machine. Machine analysis is now done based on this new - computation. - -Named state lookup now employs a breadth-first search in the lookup and - allows the user to fully qualify names, making it possible to specify - jumps/calls into parts of the machine deep in the name hierarchy. Each part - of name (separated by ::) employs a breadth first search from it's starting - point. - -Name references now must always refer to a single state. Since references to - multiple states is not normally intended, it no longer happens - automatically. This frees the programmer from thinking about whether or not - a state reference is unique. It also avoids the added complexity of - determining when to merge the targets of multiple references. The effect of - references to multiple states can be explicitly created using the join - operator and epsilon transitions. - -M option was split into -S and -M. -S specifies the machine spec to generate - for graphviz output and dumping. -M specifies the machine definition or - instantiation. - -Machine function parameters are now prefixed with and underscore to - avoid the hiding of class members. - -Ragel 3.4 - May 8, 2004 -======================= - -Added the longest match kleene star operator **, which is synonymous - with ( ( ) $0 %1 ) *. - -Epsilon operators distinguish between leaving transitions (going to an - another expression in a comma separated list) and non-leaving transitions. - Leaving actions and priorities are appropriately transferred. - -Relative priority of following ops changed to: - 1. Action/Priority - 2. Epsilon - 3. Label - If label is done first then the isolation of the start state in > operators - will cause the label to point to the old start state that doesn't have the - new action/priority. - -Merged >! and >~, @! and @~, %! and %~, and $! and $~ operators to have one - set of global error action operators (>!, @!, %! and $!) that are invoked on - error by unexpected characters as well as by unexepected EOF. - -Added the fpc keyword for use in action code. This is a pointer to the - current character. *fpc == fc. If an action is invoked on EOF then fpc == 0. - -Added >^, @^, %^, and $^ local error operators. Global error operators (>!, - @!, $!, and %!) cause actions to be invoked if the final machine fails. - Local error actions cause actions to be invoked if if the current machine - fails. - -Changed error operators to mean embed global/local error actions in: - >! and !^ -the start state. - @! and @^ -states that are not the start state and are not final. - %! and %^ -final states. - $! and $^ -all states. - -Added >@! which is synonymous >! then @! - -Added >@^ which is synonymous >^ then @^ - -Added @%! which is synonymous @! then %! - -Added @%^ which is synonymous >^ then @^ - -FsmGraph representation of transition lists was changed from a mapping of - alphabet key -> transition objects using a BST to simply a list of - transition objects. Since the transitions are no longer divided by - single/range, the fast finding of transition objects by key is no longer - required functionality and can be eliminated. This new implementation uses - the same amount of memory however causes less allocations. It also make more - sense for supporting error transitions with actions. Previously an error - transition was represented by a null value in the BST. - -Regular expression ranges are checked to ensure that lower <= upper. - -Added printf-like example. - -Added atoi2, erract2, and gotcallret to the test suite. - -Improved build test to support make -jN and simplified the compiling and - running of tests. - -Ragel 3.3 - Mar 7, 2004 -======================= - -Portability bug fixes were made. Minimum and maximum integer values are - now taken from the system. An alignment problem on 64bit systems - was fixed. - -Ragel 3.2 - Feb 28, 2004 -======================== - -Added a Vim syntax file. - -Eliminated length var from generated execute code in favour of an end - pointer. Using length requires two variables be read and written. Using an - end pointer requires one variable read and written and one read. Results in - more optimizable code. - -Minimization is now on by default. - -States are ordered in output by depth first search. - -Bug in minimization fixed. States were not being distinguished based on - error actions. - -Added null and empty builtin machines. - -Added EOF error action operators. These are >~, >@, $~, and %~. EOF error - operators embed actions to take if the EOF is seen and interpreted as an - error. The operators correspond to the following states: - -the start state - -any state with a transition to a final state - -any state with a transiion out - -a final state - -Fixed bug in generation of unreference machine vars using -M. Unreferenced - vars don't have a name tree built underneath when starting from - instantiations. Need to instead build the name tree starting at the var. - -Calls, returns, holds and references to fc in out action code are now - handled for ipgoto output. - -Only actions referenced by an instantiated machine expression are put into - the action index and written out. - -Added rlscan, an example that lexes Ragel input. - -Ragel 3.1 - Feb 18, 2004 -======================== - -Duplicates in OR literals are removed and no longer cause an assertion - failure. - -Duplicate entry points used in goto and call statements are made into - deterministic entry points. - -Base FsmGraph code moved from aapl into ragel, as an increasing amount - of specialization is required. Too much time was spent attempting to - keep it as a general purpose template. - -FsmGraph code de-templatized and heirarchy squashed to a single class. - -Single transitions taken out of FsmGraph code. In the machine construction - stage, transitions are now implemented only with ranges and default - transtions. This reduces memory consumption, simplifies code and prevents - covered transitions. However it requires the automated selection of single - transitions to keep goto-driven code lean. - -Machine reduction completely rewritten to be in-place. As duplicate - transitions and actions are found and the machine is converted to a format - suitable for writing as C code or as GraphViz input, the memory allocated - for states and transitions is reused, instead of newly allocated. - -New reduction code consolodates ranges, selects a default transition, and - selects single transitions with the goal of joining ranges that are split by - any number of single characters. - -Line directive changed from "# " to the more common format - "#line ". - -Operator :! changed to @!. This should have happened in last release. - -Added params example. - -Ragel 3.0 - Jan 22, 2004 -======================== - -Ragel now parses the contents of struct statements and action code. - -The keyword fc replaces the use of *p to reference the current character in - action code. - -Machine instantiations other than main are allowed. - -Call, jump and return statements are now available in action code. This - facility makes it possible to jump to an error handling machine, call a - sub-machine for parsing a field or to follow paths through a machine as - determined by arbitrary C code. - -Added labels to the language. Labels can be used anywhere in a machine - expression to define an entry point. Also references to machine definitions - cause the implicit creation of a label. - -Added epsilon transitions to the language. Epsilon operators may reference - labels in the current name scope resolved when join operators are evaluated - and at the root of the expression tree of machine assignment/instantiation. - -Added the comma operator, which joins machines together without drawing any - transitions between them. This operator is useful in combination with - labels, the epsilon operator and user code transitions for defining machines - using the named state and transition list paradigm. It is also useful for - invoking transitions based on some analysis of the input or on the - environment. - -Added >!, :!, $!, %! operators for specifying actions to take should the - machine fail. These operators embed actions to execute if the machine - fails in - -the start state - -any state with a transition to a final state - -any state with a transiion out - -a final state - The general rule is that if an action embedding operator embeds an action - into a set of transitions T, then the error-counterpart with a ! embeds an - action into the error transition taken when any transition T is a candidate, - but does not match the input. - -The finishing augmentation operator ':' has been changed to '@'. This - frees the ':' symbol for machine labels and avoids hacks to the parser to - allow the use of ':' for both labels and finishing augmentations. The best - hack required that label names be distinct from machine definition names as - in main := word : word; This restriction is not good because labels are - local to the machine that they are used in whereas machine names are global - entities. Label name choices should not be restricted by the set of names - that are in use for machines. - -Named priority syntax now requires parenthesis surrounding the name and - value pair. This avoids grammar ambiguities now that the ',' operator has - been introduced and makes it more clear that the name and value are an - asscociated pair. - -Backslashes are escaped in line directive paths. - -Ragel 2.2 - Oct 6, 2003 -======================= - -Added {n}, {,n}, {n,} {n,m} repetition operators. - {n} -- exactly n repetitions - {,n} -- zero to n repetitions - {n,} -- n or more repetitions - {n,m} -- n to m repetitions - -Bug in binary search table in Aapl fixed. Fixes crashing on machines that - add to action tables that are implicitly shared among transitions. - -Tests using obsolete minimization algorithms are no longer built and run by - default. - -Added atoi and concurrent from examples to the test suite. - -Ragel 2.1 - Sep 22, 2003 -======================== - -Bug in priority comparison code fixed. Segfaulted on some input with many - embedded priorities. - -Added two new examples. - -Ragel 2.0 - Sep 7, 2003 -======================= - -Optional (?), One or More (+) and Kleene Star (*) operators changed from - prefix to postfix. Rationale is that postfix version is far more common in - regular expression implementations and will be more readily understood. - -All priority values attached to transitions are now accompanied by a name. - Transitions no longer have default priority values of zero assigned - to them. Only transitions that have different priority values assigned - to the same name influence the NFA-DFA conversion. This scheme reduces - side-effects of priorities. - -Removed the %! statement for unsetting pending out priorities. With - named priorities, it is not necessary to clear the priorities of a - machine with $0 %! because non-colliding names can be used to avoid - side-effects. - -Removed the clear keyword, which was for removing actions from a machine. - Not required functionality and it is non-intuitive to have a language - feature that undoes previous definitions. - -Removed the ^ modifier to repetition and concatenation operators. This - undocumented feature prevented out transitions and out priorities from being - transfered from final states to transitions leaving machines. Not required - functionality and complicates the language unnecessarily. - -Keyword 'func' changed to 'action' as a part of the phasing out of the term - 'function' in favour of 'action'. Rationale is that the term 'function' - implies that the code is called like a C function, which is not necessarily - the case. The term 'action' is far more common in state machine compiler - implementations. - -Added the instantiation statement, which looks like a standard variable - assignment except := is used instead of =. Instantiations go into the - same graph dictionary as definitions. In the the future, instantiations - will be used as the target for gotos and calls in action code. - -The main graph should now be explicitly instantiated. If it is not, - a warning is issued. - -Or literal basic machines ([] outside of regular expressions) now support - negation and ranges. - -C and C++ interfaces lowercased. In the C interface an underscore now - separates the fsm machine and the function name. Rationale is that lowercased - library and generated routines are more common. - C output: - int fsm_init( struct clang *fsm ); - int fsm_execute( struct clang *fsm, char *data, int dlen ); - int fsm_finish( struct clang *fsm ); - C++ output: - int fsm::init( ); - int fsm::execute( char *data, int dlen ); - int fsm::finish( ); - -Init, execute and finish all return -1 if the machine is in the error state - and can never accept, 0 if the machine is in a non-accepting state that has a - path to a final state and 1 if the machine is in an accepting state. - -Accept routine eliminated. Determining whether or not the machine accepts is - done by examining the return value of the finish routine. - -In C output, fsm structure is no longer a typedef, so referencing requires - the struct keyword. This is to stay in line with C language conventions. - -In C++ output, constructor is no longer written by ragel. As a consequence, - init routine is not called automatically. Allows constructor to be supplied - by user as well as the return value of init to be examined without calling it - twice. - -Static start state and private structures are taken out of C++ classes. - -Ragel 1.5.4 - Jul 14, 2003 -========================== - -Workaround for building with bison 1.875, which produces an - optimization that doesn't build with newer version gcc. - -Ragel 1.5.3 - Jul 10, 2003 -========================== - -Fixed building with versions of flex that recognize YY_NO_UNPUT. - -Fixed version numbers in ragel.spec file. - -Ragel 1.5.2 - Jul 7, 2003 -========================= - -Transition actions and out actions displayed in the graphviz output. - -Transitions on negative numbers handled in graphviz output. - -Warning generated when using bison 1.875 now squashed. - -Ragel 1.5.1 - Jun 21, 2003 -========================== - -Bugs fixed: Don't delete the output objects when writing to standard out. - Copy mem into parser buffer with memcpy, not strcpy. Fixes buffer mem errror. - -Fixes for compiling with Sun WorkShop 6 compilers. - -Ragel 1.5.0 - Jun 10, 2003 -========================== - -Line directives written to the output so that errors in the action code - are properly reported in the ragel input file. - -Simple graphviz dot file output format is supported. Shows states and - transitions. Does not yet show actions. - -Options -p and -f dropped in favour of -d output format. - -Added option -M for specifying the machine to dump with -d or the graph to - generate with -V. - -Error recovery implemented. - -Proper line and column number tracking implemented in the scanner. - -All action/function code is now embedded in the main Execute routine. Avoids - duplication of action code in the Finish routine and the need to call - ExecFuncs which resulted in huge code bloat. Will also allow actions to - modify cs when fsm goto, call and return is supported in action code. - -Fsm spec can have no statements, nothing will be generated. - -Bug fix: Don't accept ] as the opening of a .-. range a reg exp. - -Regular expression or set ranges (ie /[0-9]/) are now handled by the parser - and consequently must be well-formed. The following now generates a parser - error: /[+-]/ and must be rewritten as /[+\-]/. Also fixes a bug whereby ] - might be accepted as the opening of a .-. range causing /[0-9]-[0-9]/ to - parse incorrectly. - -\v, \f, and \r are now treated as whitespace in an fsm spec. - -Ragel 1.4.1 - Nov 19, 2002 -========================== - -Compile fixes. The last release (integer alphabets) was so exciting - that usual portability checks got bypassed. - -Ragel 1.4.0 - Nov 19, 2002 -========================== - -Arbitrary integer alphabets are now fully supported! A new language - construct: - 'alphtype ' added for specifying the type of the alphabet. Default - is 'char'. Possible alphabet types are: - char, unsigned char, short, unsigned short, int, unsigned int - -Literal machines specified in decimal format can now be negative when the - alphabet is a signed type. - -Literal machines (strings, decimal and hex) have their values checked for - overflow/underflow against the size of the alphabet type. - -Table driven and goto driven output redesigned to support ranges. Table - driven uses a binary search for locating single characters and ranges. Goto - driven uses a switch statement for single characters and nested if blocks for - ranges. - -Switch driven output removed due to a lack of consistent advantages. Most of - the time the switch driven FSM is of no use because the goto FSM makes - smaller and faster code. Under certain circumstances it can produce smaller - code than a goto driven fsm and be almost as fast, but some sporadic case - does not warrant maintaining it. - -Many warnings changed to errors. - -Added option -p for printing the final fsm before minimization. This lets - priorities be seen. Priorties are all reset to 0 before minimization. The - exiting option -f prints the final fsm after minimization. - -Fixed a bug in the clang test and example that resulted in redundant actions - being executed. - -Ragel 1.3.4 - Nov 6, 2002 -========================= - -Fixes to Chapter 1 of the guide. - -Brought back the examples and made them current. - -MSVC is no longer supported for compiling windows binaries because its - support for the C++ standard is frustratingly inadequate, it will cost money - to upgrade if it ever gets better, and MinGW is a much better alternative. - -The build system now supports the --host= option for building ragel - for another system (used for cross compiling a windows binary with MinGW). - -Various design changes and fixes towards the goal of arbitrary integer - alphabets and the handling of larger state machines were made. - -The new shared vector class is now used for action lists in transitions and - states to reduce memory allocations. - -An avl tree is now used for the reduction of transitions and functions of an - fsm graph before making the final machine. The tree allows better scalability - and performance by not requiring consecutively larger heap allocations. - -Final stages in the separation of fsm graph code from action embedding and - priority assignment is complete. Makes the base graph leaner and easier to reuse - in other projects (like Keller). - -Ragel 1.3.3 - Oct 22, 2002 -========================== - -More diagrams were added to section 1.7.1 of the user guide. - -FSM Graph code was reworked to spearate the regex/nfa/minimizaion graph - algorithms from the manipulation of state and transition properties. - -An rpm spec file from Cris Bailiff was added. This allows an rpm for ragel - to be built with the command 'rpm -ta ragel-x.x.x.tar.gz' - -Fixes to the build system and corresponding doc updates in the README. - -Removed autil and included the one needed source file directly in the top - level ragel directory. - -Fixed a bug that nullified the 20 times speedup in large compilations - claimed by the last version. - -Removed awk from the doc build (it was added with the last release -- though - not mentioned in the changelog). - -Install of man page was moved to the doc dir. The install also installs the - user guide to $(PREFIX)/share/doc/ragel/ - -Ragel 1.3.2 - Oct 16, 2002 -========================== - -Added option -v (or --version) to show version information. - -The subtract operator no longer removes transition data from the machine - being subtracted. This is left up to the user for the purpose of making it - possible to transfer transitions using subtract and also for speeding up the - subtract routine. Note that it is possible to explicitly clear transition - data before a doing a subtract. - -Rather severe typo bug fixed. Bug was related to transitions with higher - priorities taking precedence. A wrong ptr was being returned. It appears to - have worked most of the time becuase the old ptr was deleted and the new one - allocated immediatly after so the old ptr often pointed to the same space. - Just luck though. - -Bug in the removing of dead end paths was fixed. If the start state - has in transitions then those paths were not followed when finding states to - keep. Would result in non-dead end states being removed from the graph. - -In lists and in ranges are no longer maintained as a bst with the key as the - alphabet character and the value as a list of transitions coming in on that - char. There is one list for each of inList, inRange and inDefault. Now that - the required functionality of the graph is well known it is safe to remove - these lists to gain in speed and footprint. They shouldn't be needed. - -IsolateStartState() runs on modification of start data only if the start - state is not already isolated, which is now possible with the new in list - representation. - -Concat, Or and Star operators now use an approximation to - removeUnreachableStates that does not require a traversal of the entire - graph. This combined with an 'on-the-fly' management of final bits and final - state status results is a dramatic speed increase when compiling machines - that use those operators heavily. The strings2 test goes 20 times faster. - -Before the final minimization, after all fsm operations are complete, - priority data is reset which enables better minimization in cases where - priorities would otherwise separate similar states. - -Ragel 1.3.1 - Oct 2, 2002 -========================= - -Range transitions are now used to implement machines made with /[a-z]/ and - the .. operator as well as most of the builtin machines. The ranges are not - yet reflected in the output code, they are expanded as if they came from the - regular single transitions. This is one step closer to arbitrary integer - output. - -The builtin machine 'any' was added. It is equiv to the builtin extend, - matching any characters. - -The builtin machine 'cntrl' now includes newline. - -The builtin machine 'space' now includes newline. - -The builtin machine 'ascii' is now the range 0-127, not all characters. - -A man page was written. - -A proper user guide was started. Chapter 1: Specifying Ragel Programs - was written. It even has some diagrams :) - -Ragel 1.3.0 - Sep 4, 2002 -========================= - -NULL keyword no longer used in table output. - -Though not yet in use, underlying graph structure changed to support range - transitions. As a result, most of the code that walks transition lists is now - implemented with an iterator that hides the complexity of the transition - lists and ranges. Range transitions will be used to implement /[a-z]/ style - machines and machines made with the .. operator. Previously a single - transition would be used for each char in the range, which is very costly. - Ranges eliminate much of the space complexity and allow for the .. operator - to be used with very large (integer) alphabets. - -New minimization similar to Hopcroft's alg. It does not require n^2 space and - runs close to O(n*log(n)) (an exact analysis of the alg is very hard). It is - much better than the stable and approx minimization and obsoletes them both. - An exact implementation of Hopcroft's alg is desirable but not possible - because the ragel implementation does not assume a finite alphabet, which - Hopcroft's requires. Ragel will support arbitrary integer alphabets which - must be treated as an infinite set for implementation considerations. - -New option -m using above described minimization to replace all previous - minimization options. Old options sill work but are obsolete and not - advertised with -h. - -Bug fixed in goto style output. The error exit set the current state to 0, - which is actually a valid state. If the machine was entered again it would go - into the first state, very wrong. If the first state happened to be final then - an immediate finish would accept when in fact it should fail. - -Slightly better fsm minimization now capable due to clearing of the - transition ordering numbers just prior to minimization. - -Ragel 1.2.2 - May 25, 2002 -========================== - -Configuration option --prefix now works when installing. - -cc file extension changed to cpp for better portability. - -Unlink of output file upon error no longer happens, removes dependency on - unlink system command. - -All multiline strings removed: not standard c++. - -Awk build dependency removed. - -MSVC 6.0 added to the list of supported compilers (with some tweaking of - bison and flex output). - -Ragel 1.2.1 - May 13, 2002 -========================== - -Automatic dependencies were fixed, they were not working correctly. - -Updated AUTHORS file to reflect contributors. - -Code is more C++ standards compliant: compiles with g++ 3.0 - -Fixed bugs that only showed up in g++ 3.0 - -Latest (unreleased) Aapl. - -Configuration script bails out if bison++ is installed. Ragel will not - compile with bison++ because it is coded in c++ and bison++ automatically - generates a c++ parser. Ragel uses a c-style bison parser. - -Ragel 1.2.0 - May 3, 2002 -========================= - -Underlying graph structure now supports default transitions. The result is - that a transition does not need to be made for each char of the alphabet - when making 'extend' or '/./' machines. Ragel compiles machines that - use the aforementioned primitives WAY faster. - -The ugly hacks needed to pick default transitions now go away due to - the graph supporting default transitions directly. - -If -e is given, but minimization is not turned on, print a warning. - -Makefiles use automatic dependencies. - -Ragel 1.1.0 - Apr 15, 2002 -========================== - -Added goto fsm: much faster than any other fsm style. - -Default operator (if two machines are side by side with no operator - between them) is concatenation. First showed up in 1.0.4. - -The fsm machine no longer auotmatically builds the flat table for - transition indices. Instead it keeps the key,ptr pair. In tabcodegen - the flat table is produced. This way very large alphabets with sparse - transitions will not consume large amounts of mem. This is also in prep - for fsm graph getting a default transition. - -Generated code contains a statement explicitly stating that ragel fsms - are NOT covered by the GPL. Technically, Ragel copies part of itself - to the output to make the generic fsm execution routine (for table driven - fsms only) and so the output could be considered under the GPL. But this - code is very trivial and could easlily be rewritten. The actual fsm data - is subject to the copyright of the source. To promote the use of Ragel, - a special exception is made for the part of the output copied from Ragel: - it may be used without restriction. - -Much more elegant code generation scheme is employed. Code generation - class members need only put the 'codegen' keyword after their 'void' type - in order to be automatically registerd to handle macros of the same name. - An awk script recognises this keyword and generates an appropriate driver. - -Ragel gets a test suite. - -Postfunc and prefunc go away because they are not supported by non - loop-driven fsms (goto, switch) and present duplicate functionality. - Universal funcs can be implemented by using $ operator. - -Automatic dependencies used in build system, no more make depend target. - -Code generation section in docs. - -Uses the latests aapl. - -Ragel 1.0.5 - Mar 3, 2002 -========================= - -Bugfix in SetErrorState that caused an assertion failure when compiling - simple machines that did not have full transition tables (and thus did - not show up on any example machines). Assertion failure did not occur - when using the switch statement code as ragel does not call SetErrorState - in that case. - -Fixed some missing includes, now compiles on redhat. - -Moved the FsmMachTrans Compare class out of FsmMachTrans. Some compilers - don't deal with nested classes in templates too well. - -Removed old unused BASEREF in fsmgraph and ragel now compiles using - egcs-2.91.66 and presumably SUNWspro. The baseref is no longer needed - because states do not support being elements in multiple lists. I would - rather be able to support more compilers than have this feature. - -Started a README with compilation notes. Started an AUTHORS file. - -Started the user documentation. Describes basic machines and operators. - -Ragel 1.0.4 - Mar 1, 2002 -========================= - -Ported to the version of Aapl just after 2.2.0 release. See - http://www.ragel.ca/aapl/ for details on aapl. - -Fixed a bug in the clang example: the newline machine was not stared. - -Added explanations to the clang and mailbox examples. This should - help people that want to learn the lanuage as the manual is far from - complete. - -Ragel 1.0.3 - Feb 2, 2002 -========================= - -Added aapl to the ragel tree. No longer requires you to download - and build aapl separately. Should avoid discouraging impatient users - from compiling ragel. - -Added the examples to the ragel tree. - -Added configure script checks for bison and flex. - -Fixed makefile so as not to die with newer versions of bison that - write the header of the parser to a .hh file. - -Started ChangeLog file. - -Ragel 1.0.2 - Jan 30, 2002 -========================== - -Bug fix in calculating highIndex for table based code. Was using - the length of out tranisition table rather than the value at the - end. - -If high/low index are at the limits, output a define in their place, - not the high/low values themselves so as not to cause compiler warnings. - -If the resulting machines don't have any indices or functions, then - omit the empty unrefereced static arrays so as not to cause compiler - warnings about unused static vars. - -Fixed variable sized indices support. The header cannot have any - reference to INDEX_TYPE as that info is not known at the time the header - data is written. Forces us to use a void * for pointers to indices. In - the c++ versions we are forced to make much of the data non-member - static data in the code portion for the same reason. - -Ragel 1.0.1 - Jan 28, 2002 -========================== - -Exe name change from reglang to ragel. - -Added ftabcodegen output code style which uses a table for states and - transitions but uses a switch statement for the function execution. - -Reformatted options in usage dump to look better. - -Support escape sequences in [] sections of regular expressions. - -Ragel 1.0 - Jan 25, 2002 -======================== - -Initial release. diff --git a/ragel/Makefile.am b/ragel/Makefile.am index af35b265..47653d07 100644 --- a/ragel/Makefile.am +++ b/ragel/Makefile.am @@ -1,26 +1,23 @@ -COLM_BIN = ../colm/colm -COLM_WRAP = ../colm/colm-wrap -COLM_LA = ../colm/libcolm.la -COLM_LIBDEP = $(COLM_LA) -COLM_BINDEP = $(COLM_BIN) $(COLM_WRAP) -KELBT = @KELBT@ -RAGEL = @RAGEL@ - # libfsm contains only the FSM construction code and the backend code -# generators. It is useful for building code generators in programs not -# connected to the ragel language. +# generators for C, asm and cgil (Code Gen Intermediate Language) . It is +# useful for building state machine code generators in programs not connected +# to the ragel language. lib_LTLIBRARIES = libfsm.la pkginclude_HEADERS = \ - action.h fsmgraph.h ragel.h common.h \ + action.h fsmgraph.h common.h \ gendata.h redfsm.h dot.h # nodist_pkginclude_HEADERS = config.h -data_DATA = ril.lm rlhc-main.lm \ +data_DATA = $(CGIL_FILES) + +CGIL_FILES = ril.lm rlhc-main.lm \ rlhc-c.lm rlhc-csharp.lm rlhc-go.lm rlhc-js.lm rlhc-ruby.lm \ rlhc-crack.lm rlhc-d.lm rlhc-java.lm rlhc-julia.lm rlhc-ocaml.lm rlhc-rust.lm +EXTRA_DIST = $(CGIL_FILES) + # # libfsm: state machine construction and direct code generation. # @@ -55,18 +52,3 @@ if LINKER_NO_UNDEFINED libfsm_la_LDFLAGS += -Wl,--no-undefined endif -EXTRA_DIST = \ - $(RAGEL_LM) \ - rlscan.rl \ - rlparse.kh \ - rlparse.kl \ - ril.lm \ - rlhc-main.lm - -CLEANFILES = parse.c commit.cc rlhc.c - -RAGEL_LM = \ - rlparse.lm \ - ragel.lm \ - rlreduce.lm - diff --git a/ragel/load.cc b/ragel/load.cc deleted file mode 100644 index 47aee0d4..00000000 --- a/ragel/load.cc +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright 2015-2018 Adrian Thurston - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "load.h" -#include "ragel.h" -#include "inputdata.h" -#include "parsedata.h" -#include "parsetree.h" - -#include -#include -#include -#include - -using std::endl; -using std::ifstream; - -extern colm_sections rlparse_object; - -char *unescape( const char *s, int slen ) -{ - char *out = new char[slen+1]; - char *d = out; - - for ( int i = 0; i < slen; ) { - if ( s[i] == '\\' ) { - switch ( s[i+1] ) { - case '0': *d++ = '\0'; break; - case 'a': *d++ = '\a'; break; - case 'b': *d++ = '\b'; break; - case 't': *d++ = '\t'; break; - case 'n': *d++ = '\n'; break; - case 'v': *d++ = '\v'; break; - case 'f': *d++ = '\f'; break; - case 'r': *d++ = '\r'; break; - default: *d++ = s[i+1]; break; - } - i += 2; - } - else { - *d++ = s[i]; - i += 1; - } - } - *d = 0; - return out; -} - -char *unescape( const char *s ) -{ - return unescape( s, strlen(s) ); -} - -InputLoc::InputLoc( colm_location *pcloc ) -{ - if ( pcloc != 0 ) { - fileName = pcloc->name; - line = pcloc->line; - col = pcloc->column; - } - else { - fileName = 0; - line = -1; - col = -1; - } - - if ( fileName == 0 ) - fileName = "-"; -} diff --git a/ragel/longest.cc b/ragel/longest.cc deleted file mode 100644 index bf1b2a54..00000000 --- a/ragel/longest.cc +++ /dev/null @@ -1,571 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - -/* Parsing. */ -#include "ragel.h" -#include "parsetree.h" -#include "parsedata.h" - -void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph ) -{ - graph->markReachableFromHereStopFinal( graph->startState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & STB_ISMARKED ) { - ms->lmItemSet.insert( 0 ); - ms->stateBits &= ~ STB_ISMARKED; - } - } - - /* Transfer the first item of non-empty lmAction tables to the item sets - * of the states that follow. Exclude states that have no transitions out. - * This must happen on a separate pass so that on each iteration of the - * next pass we have the item set entries from all lmAction tables. */ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - if ( trans->plain() ) { - TransDataAp *tdap = trans->tdap(); - if ( tdap->lmActionTable.length() > 0 ) { - LmActionTableEl *lmAct = tdap->lmActionTable.data; - StateAp *toState = tdap->toState; - assert( toState ); - - /* Can only optimize this if there are no transitions out. - * Note there can be out transitions going nowhere with - * actions and they too must inhibit this optimization. */ - if ( toState->outList.length() > 0 ) { - /* Fill the item sets. */ - graph->markReachableFromHereStopFinal( toState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & STB_ISMARKED ) { - ms->lmItemSet.insert( lmAct->value ); - ms->stateBits &= ~ STB_ISMARKED; - } - } - } - } - } - else { - for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { - if ( cond->lmActionTable.length() > 0 ) { - - LmActionTableEl *lmAct = cond->lmActionTable.data; - StateAp *toState = cond->toState; - assert( toState ); - - /* Can only optimize this if there are no transitions out. - * Note there can be out transitions going nowhere with - * actions and they too must inhibit this optimization. */ - if ( toState->outList.length() > 0 ) { - /* Fill the item sets. */ - graph->markReachableFromHereStopFinal( toState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & STB_ISMARKED ) { - ms->lmItemSet.insert( lmAct->value ); - ms->stateBits &= ~ STB_ISMARKED; - } - } - } - } - } - } - } - } - - /* The lmItem sets are now filled, telling us which longest match rules - * can succeed in which states. First determine if we need to make sure - * act is defaulted to zero. We need to do this if there are any states - * with lmItemSet.length() > 1 and NULL is included. That is, that the - * switch may get called when in fact nothing has been matched. */ - int maxItemSetLength = 0; - graph->markReachableFromHereStopFinal( graph->startState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & STB_ISMARKED ) { - if ( ms->lmItemSet.length() > maxItemSetLength ) - maxItemSetLength = ms->lmItemSet.length(); - ms->stateBits &= ~ STB_ISMARKED; - } - } - - /* The actions executed on starting to match a token. */ - FsmRes res = FsmAp::isolateStartState( graph ); - graph = res.fsm; - graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart ); - graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart ); - if ( maxItemSetLength > 1 ) { - /* The longest match action switch may be called when tokens are - * matched, in which case act must be initialized, there must be a - * case to handle the error, and the generated machine will require an - * error state. */ - lmSwitchHandlesError = true; - pd->fsmCtx->lmRequiresErrorState = true; - graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId ); - } - - /* The place to store transitions to restart. It maybe possible for the - * restarting to affect the searching through the graph that follows. For - * now take the safe route and save the list of transitions to restart - * until after all searching is done. */ - Vector restartData; - Vector restartCond; - - /* Set actions that do immediate token recognition, set the longest match part - * id and set the token ending. */ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - if ( trans->plain() ) { - TransDataAp *tdap = trans->tdap(); - if ( tdap->lmActionTable.length() > 0 ) { - LmActionTableEl *lmAct = tdap->lmActionTable.data; - StateAp *toState = tdap->toState; - assert( toState ); - - /* Can only optimize this if there are no transitions out. - * Note there can be out transitions going nowhere with - * actions and they too must inhibit this optimization. */ - if ( toState->outList.length() == 0 ) { - /* Can execute the immediate action for the longest match - * part. Redirect the action to the start state. - * - * NOTE: When we need to inhibit on_last due to leaving - * actions the above test suffices. If the state has out - * actions then it will fail because the out action will - * have been transferred to an error transition, which - * makes the outlist non-empty. */ - tdap->actionTable.setAction( lmAct->key, - lmAct->value->actOnLast ); - restartData.append( trans ); - } - else { - /* Look for non final states that have a non-empty item - * set. If these are present then we need to record the - * end of the token. Also Find the highest item set - * length reachable from here (excluding at transtions to - * final states). */ - bool nonFinalNonEmptyItemSet = false; - maxItemSetLength = 0; - graph->markReachableFromHereStopFinal( toState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & STB_ISMARKED ) { - if ( ms->lmItemSet.length() > 0 && !ms->isFinState() ) - nonFinalNonEmptyItemSet = true; - if ( ms->lmItemSet.length() > maxItemSetLength ) - maxItemSetLength = ms->lmItemSet.length(); - ms->stateBits &= ~ STB_ISMARKED; - } - } - - /* If there are reachable states that are not final and - * have non empty item sets or that have an item set - * length greater than one then we need to set tokend - * because the error action that matches the token will - * require it. */ - if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 ) - tdap->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd ); - - /* Some states may not know which longest match item to - * execute, must set it. */ - if ( maxItemSetLength > 1 ) { - /* There are transitions out, another match may come. */ - tdap->actionTable.setAction( lmAct->key, - lmAct->value->setActId ); - } - } - } - } - else { - for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { - if ( cond->lmActionTable.length() > 0 ) { - LmActionTableEl *lmAct = cond->lmActionTable.data; - StateAp *toState = cond->toState; - assert( toState ); - - /* Can only optimize this if there are no transitions out. - * Note there can be out transitions going nowhere with - * actions and they too must inhibit this optimization. */ - if ( toState->outList.length() == 0 ) { - /* Can execute the immediate action for the longest match - * part. Redirect the action to the start state. - * - * NOTE: When we need to inhibit on_last due to leaving - * actions the above test suffices. If the state has out - * actions then it will fail because the out action will - * have been transferred to an error transition, which - * makes the outlist non-empty. */ - cond->actionTable.setAction( lmAct->key, - lmAct->value->actOnLast ); - restartCond.append( cond ); - } - else { - /* Look for non final states that have a non-empty item - * set. If these are present then we need to record the - * end of the token. Also Find the highest item set - * length reachable from here (excluding at transtions to - * final states). */ - bool nonFinalNonEmptyItemSet = false; - maxItemSetLength = 0; - graph->markReachableFromHereStopFinal( toState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & STB_ISMARKED ) { - if ( ms->lmItemSet.length() > 0 && !ms->isFinState() ) - nonFinalNonEmptyItemSet = true; - if ( ms->lmItemSet.length() > maxItemSetLength ) - maxItemSetLength = ms->lmItemSet.length(); - ms->stateBits &= ~ STB_ISMARKED; - } - } - - /* If there are reachable states that are not final and - * have non empty item sets or that have an item set - * length greater than one then we need to set tokend - * because the error action that matches the token will - * require it. */ - if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 ) - cond->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd ); - - /* Some states may not know which longest match item to - * execute, must set it. */ - if ( maxItemSetLength > 1 ) { - /* There are transitions out, another match may come. */ - cond->actionTable.setAction( lmAct->key, lmAct->value->setActId ); - } - } - } - } - } - } - } - - /* Now that all graph searching is done it certainly safe set the - * restarting. It may be safe above, however this must be verified. */ - for ( Vector::Iter pt = restartData; pt.lte(); pt++ ) - restart( graph, *pt ); - - for ( Vector::Iter pt = restartCond; pt.lte(); pt++ ) - restart( graph, *pt ); - - int lmErrActionOrd = pd->fsmCtx->curActionOrd++; - - /* Embed the error for recognizing a char. */ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) { - if ( st->isFinState() ) { - /* On error execute the onActNext action, which knows that - * the last character of the token was one back and restart. */ - graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, - &st->lmItemSet[0]->actOnNext, 1 ); - st->eofActionTable.setAction( lmErrActionOrd, - st->lmItemSet[0]->actOnNext ); - st->eofTarget = graph->startState; - } - else { - graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, - &st->lmItemSet[0]->actLagBehind, 1 ); - st->eofActionTable.setAction( lmErrActionOrd, - st->lmItemSet[0]->actLagBehind ); - st->eofTarget = graph->startState; - } - } - else if ( st->lmItemSet.length() > 1 ) { - /* Need to use the select. Take note of which items the select - * is needed for so only the necessary actions are included. */ - for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) { - if ( *plmi != 0 ) - (*plmi)->inLmSelect = true; - } - /* On error, execute the action select and go to the start state. */ - graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, - &lmActSelect, 1 ); - st->eofActionTable.setAction( lmErrActionOrd, lmActSelect ); - st->eofTarget = graph->startState; - } - } - - /* Finally, the start state should be made final. */ - graph->setFinState( graph->startState ); -} - -/* Build the individual machines, setting up the NFA transitions to final - * states as we go. This is the base, unoptimized configuration. Later on we - * look to eliminate NFA transitions. Return the union of all machines. */ -FsmRes LongestMatch::buildBaseNfa( ParseData *pd ) -{ - int nfaOrder = 1; - FsmAp **parts = new FsmAp*[longestMatchList->length()]; - - /* Make each part of the longest match. */ - LmPartList::Iter lmi = longestMatchList->last(); - for ( int i = longestMatchList->length() - 1; lmi.gtb(); lmi--, i-- ) { - /* Create the machine and embed the setting of the longest match id. */ - FsmRes res = lmi->join->walk( pd ); - if ( !res.success() ) - return res; - - parts[i] = res.fsm; - - StateSet origFin = parts[i]->finStateSet; - for ( StateSet::Iter fin = origFin; fin.lte(); fin++ ) { - StateAp *orig = *fin; - StateAp *newFinal = parts[i]->addState(); - - newFinal->lmNfaParts.insert( lmi ); - - NfaTrans *trans = new NfaTrans( nfaOrder++ ); - if ( orig->nfaOut == 0 ) - orig->nfaOut = new NfaTransList; - orig->nfaOut->append( trans ); - parts[i]->attachToNfa( orig, newFinal, trans ); - - if ( orig->outPriorTable.length() > 0 ) { - newFinal->outPriorTable.insert( orig->outPriorTable ); - orig->outPriorTable.empty(); - } - if ( orig->outActionTable.length() > 0 ) { - newFinal->outActionTable.insert( orig->outActionTable ); - orig->outActionTable.empty(); - } - if ( orig->outCondSpace != 0 ) { - newFinal->outCondSpace = orig->outCondSpace; - newFinal->outCondKeys.insert( orig->outCondKeys ); - orig->outCondSpace = 0; - orig->outCondKeys.empty(); - } - - parts[i]->unsetFinState( orig ); - parts[i]->setFinState( newFinal ); - } - } - - /* Union machines one and up with machine zero. The grammar dictates that - * there will always be at least one part. */ - FsmRes fsm( FsmRes::Fsm(), parts[0] ); - for ( int i = 1; i < longestMatchList->length(); i++ ) { - fsm = FsmAp::unionOp( fsm, parts[i] ); - if ( !fsm.success() ) - return fsm; - } - - /* Create a new, isolated start state into which we can embed tokstart - * functions. */ - fsm = FsmAp::isolateStartState( fsm ); - if ( !fsm.success() ) - return fsm; - - fsm->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart ); - fsm->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart ); - - KeyOps *keyOps = pd->fsmCtx->keyOps; - - /* Draw the trasition back to the start state. */ - for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { - if ( st->lmNfaParts.length() > 0 ) { - assert( st->lmNfaParts.length() == 1 ); - - /*TransAp *newTrans = */fsm->attachNewTrans( st, - fsm->startState, keyOps->minKey, keyOps->maxKey ); - - fsm->transferOutData( st, st ); - if ( st->outCondSpace != 0 ) - FsmAp::embedCondition( fsm, st, st->outCondSpace->condSet, st->outCondKeys ); - - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - if ( trans->plain() ) - trans->tdap()->actionTable.setAction( pd->fsmCtx->curActionOrd++, st->lmNfaParts[0]->actNfaOnNext ); - else { - for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) - cond->actionTable.setAction( pd->fsmCtx->curActionOrd++, st->lmNfaParts[0]->actNfaOnNext ); - } - } - - st->eofActionTable.setAction( pd->fsmCtx->curActionOrd++, st->lmNfaParts[0]->actNfaOnEof ); - } - } - - delete[] parts; - return fsm; -} - -bool LongestMatch::matchCanFail( ParseData *pd, FsmAp *fsm, StateAp *st ) -{ - if ( st->outCondSpace != 0 ) - return true; - - return false; -} - - -void LongestMatch::eliminateNfaActions( ParseData *pd, FsmAp *fsm ) -{ - /* - * Once the union is complete we can optimize by advancing actions so they - * happen sooner, then draw the final transitions back to the start state. - * First step is to remove epsilon transitions that will never be taken. - */ - bool modified = true; - while ( modified ) { - modified = false; - - for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { - /* Check if the nfa parts list is non-empty (meaning we have a final - * state created for matching a pattern). */ - if ( st->lmNfaParts.length() > 0 && st->nfaIn != 0 ) { - /* Check if it can fail. If it can fail, then we cannot - * eliminate the prior candidates. If it can't fail then it is - * acceptable to eliminate the prior NFA transitions because we - * will never backtrack to follow them.*/ - if ( matchCanFail( pd, fsm, st ) ) - continue; - - for ( NfaInList::Iter in = *st->nfaIn; in.lte(); in++ ) { - StateAp *fromState = in->fromState; - /* Go forward until we get to the in-transition that cannot - * fail. Stop there because we are interested in what's - * before. */ - for ( NfaTransList::Iter to = *fromState->nfaOut; to.lte(); to++ ) { - if ( to->order < in->order ) { - /* Can nuke the epsilon transition that we will never - * follow. */ - fsm->detachFromNfa( fromState, to->toState, to ); - fromState->nfaOut->detach( to ); - delete to; - - modified = true; - goto restart; - } - } - } - } - } - - restart: {} - } -} - -bool LongestMatch::onlyOneNfa( ParseData *pd, FsmAp *fsm, StateAp *st, NfaTrans *in ) -{ - if ( st->nfaOut != 0 && st->nfaOut->length() == 1 && st->nfaOut->head == in ) - return true; - return false; -} - -/* Advance NFA actions to the final character of the pattern match. This only - * works when the machine cannot move forward more. */ -void LongestMatch::advanceNfaActions( ParseData *pd, FsmAp *fsm ) -{ - /* - * Advance actions to the final transition of the pattern match. - */ - for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { - /* IS OUT COND SPACE ALL? */ - if ( st->lmNfaParts.length() > 0 && st->nfaIn != 0 ) { - /* Only concern ourselves with final states that cannot fail. */ - if ( matchCanFail( pd, fsm, st ) ) - continue; - - /* If there are any out actions we cannot advance because we need - * to execute on the following character. We canot move to on-last, - * but in the next pass maybe we can eliminate the NFA action and - * move on leaving. */ - if ( st->outActionTable.length() > 0 ) - continue; - - for ( NfaInList::Iter in = *st->nfaIn; in.lte(); in++ ) { - - StateAp *fromState = in->fromState; - if ( !fsm->anyRegularTransitions( fromState ) && - onlyOneNfa( pd, fsm, fromState, in ) ) - { - /* Can nuke. */ - for ( TransInList::Iter t = fromState->inTrans; t.lte(); t++ ) { - t->actionTable.setAction( pd->fsmCtx->curActionOrd++, - st->lmNfaParts[0]->actNfaOnLast ); - } - for ( CondInList::Iter t = fromState->inCond; t.lte(); t++ ) { - t->actionTable.setAction( pd->fsmCtx->curActionOrd++, - st->lmNfaParts[0]->actNfaOnLast ); - } - - fsm->moveInwardTrans( fsm->startState, fromState ); - } - } - } - } -} - - -FsmRes LongestMatch::mergeNfaStates( ParseData *pd, FsmAp *fsm ) -{ -again: - /* - * Advance actions to the final transition of the pattern match. - */ - for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { - /* IS OUT COND SPACE ALL? */ - if ( st->lmNfaParts.length() > 0 && st->nfaIn != 0 ) { - /* Only concern ourselves with final states that cannot fail. */ - if ( matchCanFail( pd, fsm, st ) ) - continue; - - for ( NfaInList::Iter in = *st->nfaIn; in.lte(); in++ ) { - - StateAp *fromState = in->fromState; - if ( !fsm->anyRegularTransitions( fromState ) && - onlyOneNfa( pd, fsm, fromState, in ) ) - { - /* Can apply the NFA transition, eliminating it. */ - FsmAp::applyNfaTrans( fsm, fromState, st, fromState->nfaOut->head ); - goto again; - } - } - } - } - - return FsmRes( FsmRes::Fsm(), fsm ); -} - -FsmRes LongestMatch::walkNfa( ParseData *pd ) -{ - /* The longest match has it's own name scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* Build the machines. */ - FsmRes fsm = buildBaseNfa( pd ); - if ( !fsm.success() ) - return fsm; - - /* Optimization passes. */ - eliminateNfaActions( pd, fsm ); - advanceNfaActions( pd, fsm ); - fsm = mergeNfaStates( pd, fsm ); - - /* Pop the name scope. */ - pd->popNameScope( nameFrame ); - - return fsm; -} diff --git a/ragel/main.cc b/ragel/main.cc deleted file mode 100644 index e9c5db39..00000000 --- a/ragel/main.cc +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "inputdata.h" - -extern struct colm_sections rlparseC; - -int main( int argc, const char **argv ) -{ - InputData id( &hostLangC, &rlparseC, 0 ); - return id.main( argc, argv ); -} diff --git a/ragel/parsedata.cc b/ragel/parsedata.cc deleted file mode 100644 index d3474684..00000000 --- a/ragel/parsedata.cc +++ /dev/null @@ -1,1490 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "ragel.h" -#include "parsedata.h" -#include "parsetree.h" -#include "mergesort.h" -#include "version.h" -#include "inputdata.h" -#include - -using namespace std; - -const char mainMachine[] = "main"; - -void Token::_set( const char *str, int len ) -{ - length = len; - data = new char[len+1]; - memcpy( data, str, len ); - data[len] = 0; -} - -void Token::set( const char *str, int len, colm_location *cl ) -{ - _set( str, len ); - - if ( cl != 0 ) { - loc.fileName = cl->name; - loc.line = cl->line; - loc.col = cl->column; - } -} - -void Token::set( colm_data *cd, colm_location *cl ) -{ - set( cd->data, cd->length, cl ); -} - -void Token::set( const char *str, int len, const InputLoc &l ) -{ - _set( str, len ); - - loc.fileName = l.fileName; - loc.line = l.line; - loc.col = l.col; -} - -void Token::set( const char *str, int len, const ParserLoc &l ) -{ - _set( str, len ); - loc = l; -} - -void RedToken::set( colm_data *cd, colm_location *cl ) -{ - data = cd->data; - length = cd->length; - loc.fileName = cl->name; - loc.line = cl->line; - loc.col = cl->column; -} - -/* Count the transitions in the fsm by walking the state list. */ -int countTransitions( FsmAp *fsm ) -{ - int numTrans = 0; - StateAp *state = fsm->stateList.head; - while ( state != 0 ) { - numTrans += state->outList.length(); - state = state->next; - } - return numTrans; -} - -Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ) -{ - /* Reset errno so we can check for overflow or underflow. In the event of - * an error, sets the return val to the upper or lower bound being tested - * against. */ - errno = 0; - unsigned int size = pd->alphType->size; - bool unusedBits = size < sizeof(unsigned long); - - unsigned long ul = strtoul( str, 0, 16 ); - - if ( errno == ERANGE || ( unusedBits && ul >> (size * 8) ) ) { - pd->id->error(loc) << "literal " << str << " overflows the alphabet type" << endl; - ul = 1 << (size * 8); - } - - if ( unusedBits && pd->alphType->isSigned && ul >> (size * 8 - 1) ) - ul |= ( -1L >> (size*8) ) << (size*8); - - return Key( (long)ul ); -} - -Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ) -{ - if ( pd->alphType->isSigned ) { - /* Convert the number to a decimal. First reset errno so we can check - * for overflow or underflow. */ - errno = 0; - long long minVal = pd->alphType->sMinVal; - long long maxVal = pd->alphType->sMaxVal; - - long long ll = strtoll( str, 0, 10 ); - - /* Check for underflow. */ - if ( ( errno == ERANGE && ll < 0 ) || ll < minVal ) { - pd->id->error(loc) << "literal " << str << " underflows the alphabet type" << endl; - ll = minVal; - } - /* Check for overflow. */ - else if ( ( errno == ERANGE && ll > 0 ) || ll > maxVal ) { - pd->id->error(loc) << "literal " << str << " overflows the alphabet type" << endl; - ll = maxVal; - } - - return Key( (long)ll ); - } - else { - /* Convert the number to a decimal. First reset errno so we can check - * for overflow or underflow. */ - errno = 0; - unsigned long long minVal = pd->alphType->uMinVal; - unsigned long long maxVal = pd->alphType->uMaxVal; - - unsigned long long ull = strtoull( str, 0, 10 ); - - /* Check for underflow. */ - if ( ( errno == ERANGE && ull < 0 ) || ull < minVal ) { - pd->id->error(loc) << "literal " << str << " underflows the alphabet type" << endl; - ull = minVal; - } - /* Check for overflow. */ - else if ( ( errno == ERANGE && ull > 0 ) || ull > maxVal ) { - pd->id->error(loc) << "literal " << str << " overflows the alphabet type" << endl; - ull = maxVal; - } - - return Key( (unsigned long)ull ); - } -} - -/* Make an fsm key in int format (what the fsm graph uses) from an alphabet - * number returned by the parser. Validates that the number doesn't overflow - * the alphabet type. */ -Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ) -{ - /* Switch on hex/decimal format. */ - if ( str[0] == '0' && str[1] == 'x' ) - return makeFsmKeyHex( str, loc, pd ); - else - return makeFsmKeyDec( str, loc, pd ); -} - -/* Make an fsm int format (what the fsm graph uses) from a single character. - * Performs proper conversion depending on signed/unsigned property of the - * alphabet. */ -Key makeFsmKeyChar( char c, ParseData *pd ) -{ - if ( pd->fsmCtx->keyOps->isSigned ) { - /* Copy from a char type. */ - return Key( c ); - } - else { - /* Copy from an unsigned byte type. */ - return Key( (unsigned char)c ); - } -} - -/* Make an fsm key array in int format (what the fsm graph uses) from a string - * of characters. Performs proper conversion depending on signed/unsigned - * property of the alphabet. */ -void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ) -{ - if ( pd->fsmCtx->keyOps->isSigned ) { - /* Copy from a char star type. */ - char *src = data; - for ( int i = 0; i < len; i++ ) - result[i] = Key(src[i]); - } - else { - /* Copy from an unsigned byte ptr type. */ - unsigned char *src = (unsigned char*) data; - for ( int i = 0; i < len; i++ ) - result[i] = Key(src[i]); - } -} - -/* Like makeFsmKeyArray except the result has only unique keys. They ordering - * will be changed. */ -void makeFsmUniqueKeyArray( KeySet &result, const char *data, int len, - bool caseInsensitive, ParseData *pd ) -{ - /* Use a transitions list for getting unique keys. */ - if ( pd->fsmCtx->keyOps->isSigned ) { - /* Copy from a char star type. */ - const char *src = data; - for ( int si = 0; si < len; si++ ) { - Key key( src[si] ); - result.insert( key ); - if ( caseInsensitive ) { - if ( key.isLower() ) - result.insert( key.toUpper() ); - else if ( key.isUpper() ) - result.insert( key.toLower() ); - } - } - } - else { - /* Copy from an unsigned byte ptr type. */ - const unsigned char *src = (unsigned char*) data; - for ( int si = 0; si < len; si++ ) { - Key key( src[si] ); - result.insert( key ); - if ( caseInsensitive ) { - if ( key.isLower() ) - result.insert( key.toUpper() ); - else if ( key.isUpper() ) - result.insert( key.toLower() ); - } - } - } -} - -/* Make a builtin type. Depends on the signed nature of the alphabet type. */ -FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd ) -{ - /* FsmAp created to return. */ - FsmAp *retFsm = 0; - bool isSigned = pd->fsmCtx->keyOps->isSigned; - - switch ( builtin ) { - case BT_Any: { - /* All characters. */ - retFsm = FsmAp::dotFsm( pd->fsmCtx ); - break; - } - case BT_Ascii: { - /* Ascii characters 0 to 127. */ - retFsm = FsmAp::rangeFsm( pd->fsmCtx, 0, 127 ); - break; - } - case BT_Extend: { - /* Ascii extended characters. This is the full byte range. Dependent - * on signed, vs no signed. If the alphabet is one byte then just use - * dot fsm. */ - if ( isSigned ) - retFsm = FsmAp::rangeFsm( pd->fsmCtx, -128, 127 ); - else - retFsm = FsmAp::rangeFsm( pd->fsmCtx, 0, 255 ); - break; - } - case BT_Alpha: { - /* Alpha [A-Za-z]. */ - FsmAp *upper = FsmAp::rangeFsm( pd->fsmCtx, 'A', 'Z' ); - FsmAp *lower = FsmAp::rangeFsm( pd->fsmCtx, 'a', 'z' ); - FsmRes res = FsmAp::unionOp( upper, lower ); - upper = res.fsm; - upper->minimizePartition2(); - retFsm = upper; - break; - } - case BT_Digit: { - /* Digits [0-9]. */ - retFsm = FsmAp::rangeFsm( pd->fsmCtx, '0', '9' ); - break; - } - case BT_Alnum: { - /* Alpha numerics [0-9A-Za-z]. */ - FsmAp *digit = FsmAp::rangeFsm( pd->fsmCtx, '0', '9' ); - FsmAp *upper = FsmAp::rangeFsm( pd->fsmCtx, 'A', 'Z' ); - FsmAp *lower = FsmAp::rangeFsm( pd->fsmCtx, 'a', 'z' ); - FsmRes res1 = FsmAp::unionOp( digit, upper ); - digit = res1.fsm; - FsmRes res2 = FsmAp::unionOp( digit, lower ); - digit = res2.fsm; - digit->minimizePartition2(); - retFsm = digit; - break; - } - case BT_Lower: { - /* Lower case characters. */ - retFsm = FsmAp::rangeFsm( pd->fsmCtx, 'a', 'z' ); - break; - } - case BT_Upper: { - /* Upper case characters. */ - retFsm = FsmAp::rangeFsm( pd->fsmCtx, 'A', 'Z' ); - break; - } - case BT_Cntrl: { - /* Control characters. */ - FsmAp *cntrl = FsmAp::rangeFsm( pd->fsmCtx, 0, 31 ); - FsmAp *highChar = FsmAp::concatFsm( pd->fsmCtx, 127 ); - FsmRes res = FsmAp::unionOp( cntrl, highChar ); - cntrl = res.fsm; - cntrl->minimizePartition2(); - retFsm = cntrl; - break; - } - case BT_Graph: { - /* Graphical ascii characters [!-~]. */ - retFsm = FsmAp::rangeFsm( pd->fsmCtx, '!', '~' ); - break; - } - case BT_Print: { - /* Printable characters. Same as graph except includes space. */ - retFsm = FsmAp::rangeFsm( pd->fsmCtx, ' ', '~' ); - break; - } - case BT_Punct: { - /* Punctuation. */ - FsmAp *range1 = FsmAp::rangeFsm( pd->fsmCtx, '!', '/' ); - FsmAp *range2 = FsmAp::rangeFsm( pd->fsmCtx, ':', '@' ); - FsmAp *range3 = FsmAp::rangeFsm( pd->fsmCtx, '[', '`' ); - FsmAp *range4 = FsmAp::rangeFsm( pd->fsmCtx, '{', '~' ); - - FsmRes res1 = FsmAp::unionOp( range1, range2 ); - range1 = res1.fsm; - FsmRes res2 = FsmAp::unionOp( range1, range3 ); - range1 = res2.fsm; - FsmRes res3 = FsmAp::unionOp( range1, range4 ); - range1 = res3.fsm; - range1->minimizePartition2(); - retFsm = range1; - break; - } - case BT_Space: { - /* Whitespace: [\t\v\f\n\r ]. */ - FsmAp *cntrl = FsmAp::rangeFsm( pd->fsmCtx, '\t', '\r' ); - FsmAp *space = FsmAp::concatFsm( pd->fsmCtx, ' ' ); - FsmRes res = FsmAp::unionOp( cntrl, space ); - cntrl = res.fsm; - cntrl->minimizePartition2(); - retFsm = cntrl; - break; - } - case BT_Xdigit: { - /* Hex digits [0-9A-Fa-f]. */ - FsmAp *digit = FsmAp::rangeFsm( pd->fsmCtx, '0', '9' ); - FsmAp *upper = FsmAp::rangeFsm( pd->fsmCtx, 'A', 'F' ); - FsmAp *lower = FsmAp::rangeFsm( pd->fsmCtx, 'a', 'f' ); - - FsmRes res1 = FsmAp::unionOp( digit, upper ); - digit = res1.fsm; - FsmRes res2 = FsmAp::unionOp( digit, lower ); - digit = res2.fsm; - - digit->minimizePartition2(); - retFsm = digit; - break; - } - case BT_Lambda: { - retFsm = FsmAp::lambdaFsm( pd->fsmCtx ); - break; - } - case BT_Empty: { - retFsm = FsmAp::emptyFsm( pd->fsmCtx ); - break; - }} - - return retFsm; -} - -/* Check if this name inst or any name inst below is referenced. */ -bool NameInst::anyRefsRec() -{ - if ( numRefs > 0 ) - return true; - - /* Recurse on children until true. */ - for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) { - if ( (*ch)->anyRefsRec() ) - return true; - } - - return false; -} - -NameInst::~NameInst() -{ - /* Recurse on the implicit final state and then all children. */ - if ( final != 0 ) - delete final; - for ( NameVect::Iter name = childVect; name.lte(); name++ ) - delete *name; -} - -/* - * ParseData - */ - -/* Initialize the structure that will collect info during the parse of a - * machine. */ -ParseData::ParseData( InputData *id, string sectionName, - int machineId, const InputLoc §ionLoc, const HostLang *hostLang, - MinimizeLevel minimizeLevel, MinimizeOpt minimizeOpt ) -: - sectionName(sectionName), - sectionGraph(0), - /* 0 is reserved for global error actions. */ - nextLocalErrKey(1), - nextNameId(0), - alphTypeSet(false), - lowerNum(0), - upperNum(0), - id(id), - machineId(machineId), - sectionLoc(sectionLoc), - rootName(0), - exportsRootName(0), - nextEpsilonResolvedLink(0), - nextLongestMatchId(1), - nextRepId(1), - cgd(0) -{ - fsmCtx = new FsmCtx( id ); - - /* Initialize the dictionary of graphs. This is our symbol table. The - * initialization needs to be done on construction which happens at the - * beginning of a machine spec so any assignment operators can reference - * the builtins. */ - initGraphDict(); - -} - -/* Clean up the data collected during a parse. */ -ParseData::~ParseData() -{ - graphDict.empty(); - fsmCtx->actionList.empty(); - - if ( fsmCtx->nameIndex != 0 ) - delete[] fsmCtx->nameIndex; - - if ( rootName != 0 ) - delete rootName; - if ( exportsRootName != 0 ) - delete exportsRootName; - - delete fsmCtx; -} - -ifstream *InputData::tryOpenInclude( const char **pathChecks, long &found ) -{ - const char **check = pathChecks; - ifstream *inFile = new ifstream; - - while ( *check != 0 ) { - inFile->open( *check ); - if ( inFile->is_open() ) { - found = check - pathChecks; - return inFile; - } - - /* - * 03/26/2011 jg: - * Don't rely on sloppy runtime behaviour: reset the state of the stream explicitly. - * If inFile->open() fails, which happens when include dirs are tested, the fail bit - * is set by the runtime library. Currently the VS runtime library opens new files, - * but when it comes to reading it refuses to work. - */ - inFile->clear(); - - check += 1; - } - - found = -1; - delete inFile; - return 0; -} - -bool isAbsolutePath( const char *path ) -{ -#ifdef _WIN32 - return isalpha( path[0] ) && path[1] == ':' && path[2] == '\\'; -#else - return path[0] == '/'; -#endif -} - -#ifdef _WIN32 -#define PATH_SEP '\\' -#else -#define PATH_SEP '/' -#endif - - -const char **InputData::makeIncludePathChecks( const char *thisFileName, const char *data ) -{ - const char **checks = 0; - long nextCheck = 0; - int length = strlen(data); - - /* Absolute path? */ - if ( isAbsolutePath( data ) ) { - checks = new const char*[2]; - checks[nextCheck++] = data; - } - else { - checks = new const char*[2 + includePaths.length()]; - - /* Search from the the location of the current file. */ - const char *lastSlash = strrchr( thisFileName, PATH_SEP ); - if ( lastSlash == 0 ) - checks[nextCheck++] = data; - else { - long givenPathLen = (lastSlash - thisFileName) + 1; - long checklen = givenPathLen + length; - char *check = new char[checklen+1]; - memcpy( check, thisFileName, givenPathLen ); - memcpy( check+givenPathLen, data, length ); - check[checklen] = 0; - checks[nextCheck++] = check; - } - - /* Search from the include paths given on the command line. */ - for ( ArgsVector::Iter incp = includePaths; incp.lte(); incp++ ) { - long pathLen = strlen( *incp ); - long checkLen = pathLen + 1 + length; - char *check = new char[checkLen+1]; - memcpy( check, *incp, pathLen ); - check[pathLen] = PATH_SEP; - memcpy( check+pathLen+1, data, length ); - check[checkLen] = 0; - checks[nextCheck++] = check; - } - } - - checks[nextCheck] = 0; - return checks; -} - - -/* An approximate check for duplicate includes. Due to aliasing of files it's - * possible for duplicates to creep in. */ -bool ParseData::duplicateInclude( const char *inclFileName, const char *inclSectionName ) -{ - for ( IncludeHistory::iterator hi = includeHistory.begin(); hi != includeHistory.end(); hi++ ) { - if ( strcmp( hi->fileName.c_str(), inclFileName ) == 0 && - strcmp( hi->sectionName.c_str(), inclSectionName ) == 0 ) - { - return true; - } - } - return false; -} - - -/* Make a name id in the current name instantiation scope if it is not - * already there. */ -NameInst *ParseData::addNameInst( const InputLoc &loc, std::string data, bool isLabel ) -{ - /* Create the name instantitaion object and insert it. */ - NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel ); - curNameInst->childVect.append( newNameInst ); - if ( !data.empty() ) { - NameMapEl *inDict = 0; - if ( curNameInst->children.insert( data, &inDict ) ) - inDict->value = new NameMapVal; - inDict->value->vals.append( newNameInst ); - } - return newNameInst; -} - -void ParseData::initNameWalk() -{ - curNameInst = rootName; - curNameChild = 0; -} - -void ParseData::initExportsNameWalk() -{ - curNameInst = exportsRootName; - curNameChild = 0; -} - -/* Goes into the next child scope. The number of the child is already set up. - * We need this for the syncronous name tree and parse tree walk to work - * properly. It is reset on entry into a scope and advanced on poping of a - * scope. A call to enterNameScope should be accompanied by a corresponding - * popNameScope. */ -NameFrame ParseData::enterNameScope( bool isLocal, int numScopes ) -{ - /* Save off the current data. */ - NameFrame retFrame; - retFrame.prevNameInst = curNameInst; - retFrame.prevNameChild = curNameChild; - retFrame.prevLocalScope = localNameScope; - - /* Enter into the new name scope. */ - for ( int i = 0; i < numScopes; i++ ) { - curNameInst = curNameInst->childVect[curNameChild]; - curNameChild = 0; - } - - if ( isLocal ) - localNameScope = curNameInst; - - return retFrame; -} - -/* Return from a child scope to a parent. The parent info must be specified as - * an argument and is obtained from the corresponding call to enterNameScope. - * */ -void ParseData::popNameScope( const NameFrame &frame ) -{ - /* Pop the name scope. */ - curNameInst = frame.prevNameInst; - curNameChild = frame.prevNameChild+1; - localNameScope = frame.prevLocalScope; -} - -void ParseData::resetNameScope( const NameFrame &frame ) -{ - /* Pop the name scope. */ - curNameInst = frame.prevNameInst; - curNameChild = frame.prevNameChild; - localNameScope = frame.prevLocalScope; -} - - -void ParseData::unsetObsoleteEntries( FsmAp *graph ) -{ - /* Loop the reference names and increment the usage. Names that are no - * longer needed will be unset in graph. */ - for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) { - /* Get the name. */ - NameInst *name = *ref; - name->numUses += 1; - - /* If the name is no longer needed unset its corresponding entry. */ - if ( name->numUses == name->numRefs ) { - assert( graph->entryPoints.find( name->id ) != 0 ); - graph->unsetEntry( name->id ); - assert( graph->entryPoints.find( name->id ) == 0 ); - } - } -} - -NameSet ParseData::resolvePart( NameInst *refFrom, - const std::string &data, bool recLabelsOnly ) -{ - /* Queue needed for breadth-first search, load it with the start node. */ - NameInstList nameQueue; - nameQueue.append( refFrom ); - - NameSet result; - while ( nameQueue.length() > 0 ) { - /* Pull the next from location off the queue. */ - NameInst *from = nameQueue.detachFirst(); - - /* Look for the name. */ - NameMapEl *el = from->children.find( data ); - if ( el != 0 ) { - /* Record all instances of the name. */ - for ( Vector::Iter low = el->value->vals; low.lte(); low++ ) - result.insert( *low ); - } - - /* Name not there, do breadth-first operation of appending all - * childrent to the processing queue. */ - for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) { - if ( !recLabelsOnly || (*name)->isLabel ) - nameQueue.append( *name ); - } - } - - /* Queue exhausted and name never found. */ - return result; -} - -void ParseData::resolveFrom( NameSet &result, NameInst *refFrom, - NameRef *nameRef, int namePos ) -{ - /* Look for the name in the owning scope of the factor with aug. */ - NameSet partResult = resolvePart( refFrom, nameRef->data[namePos], false ); - - /* If there are more parts to the name then continue on. */ - if ( ++namePos < nameRef->length() ) { - /* There are more components to the name, search using all the part - * results as the base. */ - for ( NameSet::Iter name = partResult; name.lte(); name++ ) - resolveFrom( result, *name, nameRef, namePos ); - } - else { - /* This is the last component, append the part results to the final - * results. */ - result.insert( partResult ); - } -} - -/* Write out a name reference. */ -ostream &operator<<( ostream &out, const NameRef &nameRef ) -{ - int pos = 0; - if ( nameRef[pos] == "" ) { - out << "::"; - pos += 1; - } - out << nameRef[pos++]; - for ( ; pos < nameRef.length(); pos++ ) - out << "::" << nameRef[pos]; - return out; -} - -ostream &operator<<( ostream &out, const NameInst &nameInst ) -{ - /* Count the number fully qualified name parts. */ - int numParents = 0; - NameInst *curParent = nameInst.parent; - while ( curParent != 0 ) { - numParents += 1; - curParent = curParent->parent; - } - - /* Make an array and fill it in. */ - curParent = nameInst.parent; - NameInst **parents = new NameInst*[numParents]; - for ( int p = numParents-1; p >= 0; p-- ) { - parents[p] = curParent; - curParent = curParent->parent; - } - - /* Write the parents out, skip the root. */ - for ( int p = 1; p < numParents; p++ ) - out << "::" << ( !parents[p]->name.empty() ? parents[p]->name : "" ); - - /* Write the name and cleanup. */ - out << "::" << ( !nameInst.name.empty() ? nameInst.name : "" ); - delete[] parents; - return out; -} - -struct CmpNameInstLoc -{ - static int compare( const NameInst *ni1, const NameInst *ni2 ) - { - if ( ni1->loc.line < ni2->loc.line ) - return -1; - else if ( ni1->loc.line > ni2->loc.line ) - return 1; - else if ( ni1->loc.col < ni2->loc.col ) - return -1; - else if ( ni1->loc.col > ni2->loc.col ) - return 1; - return 0; - } -}; - -void ParseData::errorStateLabels( const NameSet &resolved ) -{ - MergeSort mergeSort; - mergeSort.sort( resolved.data, resolved.length() ); - for ( NameSet::Iter res = resolved; res.lte(); res++ ) - id->error((*res)->loc) << " -> " << **res << endl; -} - - -NameInst *ParseData::resolveStateRef( NameRef *nameRef, InputLoc &loc, Action *action ) -{ - NameInst *nameInst = 0; - - /* Do the local search if the name is not strictly a root level name - * search. */ - if ( nameRef->data[0] != "" ) { - /* If the action is referenced, resolve all of them. */ - if ( action != 0 && action->embedRoots.length() > 0 ) { - /* Look for the name in all referencing scopes. */ - NameSet resolved; - for ( NameInstVect::Iter actRef = action->embedRoots; actRef.lte(); actRef++ ) - resolveFrom( resolved, *actRef, nameRef, 0 ); - - if ( resolved.length() > 0 ) { - /* Take the first one. */ - nameInst = resolved[0]; - if ( resolved.length() > 1 ) { - /* Complain about the multiple references. */ - id->error(loc) << "state reference " << *nameRef << - " resolves to multiple entry points" << endl; - errorStateLabels( resolved ); - } - } - } - } - - /* If not found in the local scope, look in global. */ - if ( nameInst == 0 ) { - NameSet resolved; - int fromPos = nameRef->data[0] != "" ? 0 : 1; - resolveFrom( resolved, rootName, nameRef, fromPos ); - - if ( resolved.length() > 0 ) { - /* Take the first. */ - nameInst = resolved[0]; - if ( resolved.length() > 1 ) { - /* Complain about the multiple references. */ - id->error(loc) << "state reference " << *nameRef << - " resolves to multiple entry points" << endl; - errorStateLabels( resolved ); - } - } - } - - if ( nameInst == 0 ) { - /* If not found then complain. */ - id->error(loc) << "could not resolve state reference " << *nameRef << endl; - } - return nameInst; -} - -void ParseData::resolveNameRefs( InlineList *inlineList, Action *action ) -{ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - switch ( item->type ) { - case InlineItem::Entry: case InlineItem::Goto: - case InlineItem::Call: case InlineItem::Ncall: - case InlineItem::Next: { - /* Resolve, pass action for local search. */ - NameInst *target = resolveStateRef( item->nameRef, item->loc, action ); - - /* Name lookup error reporting is handled by resolveStateRef. */ - if ( target != 0 ) { - /* Check if the target goes into a longest match. */ - NameInst *search = target->parent; - while ( search != 0 ) { - if ( search->isLongestMatch ) { - id->error(item->loc) << "cannot enter inside a longest " - "match construction as an entry point" << endl; - break; - } - search = search->parent; - } - - /* Record the reference in the name. This will cause the - * entry point to survive to the end of the graph - * generating walk. */ - target->numRefs += 1; - } - - item->nameTarg = target; - break; - } - default: - break; - } - - /* Some of the item types may have children. */ - if ( item->children != 0 ) - resolveNameRefs( item->children, action ); - } -} - -/* Resolve references to labels in actions. */ -void ParseData::resolveActionNameRefs() -{ - for ( ActionList::Iter act = fsmCtx->actionList; act.lte(); act++ ) { - /* Only care about the actions that are referenced. */ - if ( act->embedRoots.length() > 0 ) - resolveNameRefs( act->inlineList, act ); - } -} - -/* Walk a name tree starting at from and fill the name index. */ -void ParseData::fillNameIndex( NameInst *from ) -{ - /* Fill the value for from in the name index. */ - fsmCtx->nameIndex[from->id] = from; - - /* Recurse on the implicit final state and then all children. */ - if ( from->final != 0 ) - fillNameIndex( from->final ); - for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) - fillNameIndex( *name ); -} - -void ParseData::makeRootNames() -{ - /* Create the root name. */ - rootName = new NameInst( InputLoc(), 0, string(), nextNameId++, false ); - exportsRootName = new NameInst( InputLoc(), 0, string(), nextNameId++, false ); -} - -/* Build the name tree and supporting data structures. */ -void ParseData::makeNameTree( GraphDictEl *dictEl ) -{ - /* Set up curNameInst for the walk. */ - initNameWalk(); - - if ( dictEl != 0 ) { - /* A start location has been specified. */ - dictEl->value->makeNameTree( dictEl->loc, this ); - } - else { - /* First make the name tree. */ - for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { - /* Recurse on the instance. */ - glel->value->makeNameTree( glel->loc, this ); - } - } - - /* The number of nodes in the tree can now be given by nextNameId */ - fsmCtx->nameIndex = new NameInst*[nextNameId]; - memset( fsmCtx->nameIndex, 0, sizeof(NameInst*)*nextNameId ); - fillNameIndex( rootName ); - fillNameIndex( exportsRootName ); -} - - -void ParseData::createBuiltin( const char *name, BuiltinMachine builtin ) -{ - Expression *expression = new Expression( builtin ); - Join *join = new Join( expression ); - MachineDef *machineDef = new MachineDef( join ); - VarDef *varDef = new VarDef( name, machineDef ); - GraphDictEl *graphDictEl = new GraphDictEl( name, varDef ); - graphDict.insert( graphDictEl ); -} - -/* Initialize the graph dict with builtin types. */ -void ParseData::initGraphDict( ) -{ - createBuiltin( "any", BT_Any ); - createBuiltin( "ascii", BT_Ascii ); - createBuiltin( "extend", BT_Extend ); - createBuiltin( "alpha", BT_Alpha ); - createBuiltin( "digit", BT_Digit ); - createBuiltin( "alnum", BT_Alnum ); - createBuiltin( "lower", BT_Lower ); - createBuiltin( "upper", BT_Upper ); - createBuiltin( "cntrl", BT_Cntrl ); - createBuiltin( "graph", BT_Graph ); - createBuiltin( "print", BT_Print ); - createBuiltin( "punct", BT_Punct ); - createBuiltin( "space", BT_Space ); - createBuiltin( "xdigit", BT_Xdigit ); - createBuiltin( "null", BT_Lambda ); - createBuiltin( "zlen", BT_Lambda ); - createBuiltin( "empty", BT_Empty ); -} - -/* Set the alphabet type. If the types are not valid returns false. */ -bool ParseData::setAlphType( const InputLoc &loc, const HostLang *hostLang, const char *s1, const char *s2 ) -{ - alphTypeLoc = loc; - userAlphType = findAlphType( hostLang, s1, s2 ); - alphTypeSet = true; - return userAlphType != 0; -} - -/* Set the alphabet type. If the types are not valid returns false. */ -bool ParseData::setAlphType( const InputLoc &loc, const HostLang *hostLang, const char *s1 ) -{ - alphTypeLoc = loc; - userAlphType = findAlphType( hostLang, s1 ); - alphTypeSet = true; - return userAlphType != 0; -} - -bool ParseData::setVariable( const char *var, InlineList *inlineList ) -{ - bool set = true; - - if ( strcmp( var, "p" ) == 0 ) - fsmCtx->pExpr = inlineList; - else if ( strcmp( var, "pe" ) == 0 ) - fsmCtx->peExpr = inlineList; - else if ( strcmp( var, "eof" ) == 0 ) - fsmCtx->eofExpr = inlineList; - else if ( strcmp( var, "cs" ) == 0 ) - fsmCtx->csExpr = inlineList; - else if ( strcmp( var, "data" ) == 0 ) - fsmCtx->dataExpr = inlineList; - else if ( strcmp( var, "top" ) == 0 ) - fsmCtx->topExpr = inlineList; - else if ( strcmp( var, "stack" ) == 0 ) - fsmCtx->stackExpr = inlineList; - else if ( strcmp( var, "act" ) == 0 ) - fsmCtx->actExpr = inlineList; - else if ( strcmp( var, "ts" ) == 0 ) - fsmCtx->tokstartExpr = inlineList; - else if ( strcmp( var, "te" ) == 0 ) - fsmCtx->tokendExpr = inlineList; - else - set = false; - - return set; -} - -/* Initialize the key operators object that will be referenced by all fsms - * created. */ -void ParseData::initKeyOps( const HostLang *hostLang ) -{ - /* Signedness and bounds. */ - alphType = alphTypeSet ? userAlphType : &hostLang->hostTypes[hostLang->defaultAlphType]; - fsmCtx->keyOps->setAlphType( hostLang, alphType ); - - if ( lowerNum != 0 ) { - /* If ranges are given then interpret the alphabet type. */ - fsmCtx->keyOps->minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this ); - fsmCtx->keyOps->maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); - } -} - -void ParseData::printNameInst( std::ostream &out, NameInst *nameInst, int level ) -{ - for ( int i = 0; i < level; i++ ) - out << " "; - out << (!nameInst->name.empty() ? nameInst->name : "") << - " id: " << nameInst->id << - " refs: " << nameInst->numRefs << - " uses: " << nameInst->numUses << endl; - for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ ) - printNameInst( out, *name, level+1 ); -} - -Action *ParseData::newLmCommonAction( const char *name, InlineList *inlineList ) -{ - InputLoc loc; - loc.line = 1; - loc.col = 1; - loc.fileName = "NONE"; - - Action *action = new Action( loc, name, inlineList, fsmCtx->nextCondId++ ); - action->embedRoots.append( rootName ); - fsmCtx->actionList.append( action ); - return action; -} - -void ParseData::initLongestMatchData() -{ - if ( lmList.length() > 0 ) { - /* The initTokStart action resets the token start. */ - InlineList *il1 = new InlineList; - il1->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - il1->head->children = new InlineList; - il1->head->children->append( new InlineItem( InputLoc(), - InlineItem::LmInitTokStart ) ); - initTokStart = newLmCommonAction( "initts", il1 ); - initTokStart->isLmAction = true; - - /* The initActId action gives act a default value. */ - InlineList *il4 = new InlineList; - il4->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - il4->head->children = new InlineList; - il4->head->children->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) ); - initActId = newLmCommonAction( "initact", il4 ); - initActId->isLmAction = true; - - /* The setTokStart action sets tokstart. */ - InlineList *il5 = new InlineList; - il5->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - il5->head->children = new InlineList; - il5->head->children->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) ); - setTokStart = newLmCommonAction( "ts", il5 ); - setTokStart->isLmAction = true; - - /* The setTokEnd action sets tokend. */ - InlineList *il3 = new InlineList; - il3->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - il3->head->children = new InlineList; - il3->head->children->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) ); - setTokEnd = newLmCommonAction( "te", il3 ); - setTokEnd->isLmAction = true; - - /* The action will also need an ordering: ahead of all user action - * embeddings. */ - initTokStartOrd = fsmCtx->curActionOrd++; - initActIdOrd = fsmCtx->curActionOrd++; - setTokStartOrd = fsmCtx->curActionOrd++; - setTokEndOrd = fsmCtx->curActionOrd++; - } -} - -/* After building the graph, do some extra processing to ensure the runtime - * data of the longest mactch operators is consistent. We want tokstart to be - * null when no token match is active. */ -void ParseData::longestMatchInitTweaks( FsmAp *graph ) -{ - if ( lmList.length() > 0 ) { - /* Make sure all entry points (targets of fgoto, fcall, fnext, fentry) - * init the tokstart. */ - for ( EntryMap::Iter en = graph->entryPoints; en.lte(); en++ ) { - /* This is run after duplicates are removed, we must guard against - * inserting a duplicate. */ - ActionTable &actionTable = en->value->toStateActionTable; - if ( ! actionTable.hasAction( initTokStart ) ) { - /* We do this after the analysis pass, which reference counts - * the actions. Keep them up to date so we don't break the - * build. */ - initTokStart->numToStateRefs += 1; - actionTable.setAction( initTokStartOrd, initTokStart ); - } - } - - /* Find the set of states that are the target of transitions with - * actions that have calls. These states will be targeted by fret - * statements. */ - StateSet states; - for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->plain() ) { - for ( ActionTable::Iter ati = trans->tdap()->actionTable; ati.lte(); ati++ ) { - if ( ati->value->anyCall && trans->tdap()->toState != 0 ) - states.insert( trans->tdap()->toState ); - } - } - else { - for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { - for ( ActionTable::Iter ati = cond->actionTable; ati.lte(); ati++ ) { - if ( ati->value->anyCall && cond->toState != 0 ) - states.insert( cond->toState ); - } - } - } - } - } - - - /* Init tokstart upon entering the above collected states. */ - for ( StateSet::Iter ps = states; ps.lte(); ps++ ) { - /* This is run after duplicates are removed, we must guard against - * inserting a duplicate. */ - ActionTable &actionTable = (*ps)->toStateActionTable; - if ( ! actionTable.hasAction( initTokStart ) ) { - /* We do this after the analysis pass, which reference counts - * the actions. Keep them up to date so we don't break the - * build. */ - initTokStart->numToStateRefs += 1; - actionTable.setAction( initTokStartOrd, initTokStart ); - } - } - } -} - -/* Always returns the breadth check result. Will not consume the fsm. */ -BreadthResult *ParseData::checkBreadth( FsmAp *fsm ) -{ - double start = 0; - int minDepth = 0; - FsmAp::breadthFromEntry( start, minDepth, id->histogram, fsm, fsm->startState ); - - BreadthResult *breadth = new BreadthResult( start ); - - for ( Vector::Iter c = cuts; c.lte(); c++ ) { - for ( EntryMap::Iter mel = fsm->entryPoints; mel.lte(); mel++ ) { - if ( mel->key == c->entryId ) { - double cost = 0; - int minDepth = 0; - FsmAp::breadthFromEntry( cost, minDepth, id->histogram, fsm, mel->value ); - - breadth->costs.append( BreadthCost( c->name, cost ) ); - } - } - } - - return breadth; -} - - -static void resultWrite( ostream &out, long code, long id, const char *scode ) -{ - out << code << " " << id << " " << scode << endl; -} - -void ParseData::analysisResult( long code, long _id, const char *scode ) -{ - stringstream out; - resultWrite( out, code, _id, scode ); - id->comm = out.str(); -} - -void ParseData::reportBreadthResults( BreadthResult *breadth ) -{ - stringstream out; - - out << std::fixed << std::setprecision(10); - - out << "COST START " << - ( breadth->start ) << " " << - ( 1 ) << endl; - - for ( Vector::Iter c = breadth->costs; c.lte(); c++ ) { - out << "COST " << c->name << " " << - ( breadth->start ) << " " << - ( ( c->cost / breadth->start ) ) << endl; - } - - this->id->comm += out.str(); -} - -void ParseData::reportAnalysisResult( FsmRes &res ) -{ - if ( res.type == FsmRes::TypeTooManyStates ) - analysisResult( 1, 0, "too-many-states" ); - - else if ( res.type == FsmRes::TypeCondCostTooHigh ) - analysisResult( 20, res.id, "cond-cost" ); - - else if ( res.type == FsmRes::TypePriorInteraction ) - analysisResult( 60, res.id, "prior-interaction" ); -} - - -/* Make the graph from a graph dict node. Does minimization and state sorting. */ -FsmRes ParseData::makeInstance( GraphDictEl *gdNode ) -{ - if ( id->printStatistics ) - id->stats() << "compiling\t" << sectionName << endl; - - if ( id->stateLimit > 0 ) - fsmCtx->stateLimit = id->stateLimit; - - /* Build the graph from a walk of the parse tree. */ - FsmRes graph = gdNode->value->walk( this ); - - if ( id->stateLimit > 0 ) - fsmCtx->stateLimit = FsmCtx::STATE_UNLIMITED; - - /* Perform the breadth computation. This does not affect the FSM result. We - * compute and print and move on. Higher up we catch the checkBreadth flag - * and stop output. */ - if ( graph.success() && id->checkBreadth ) { - BreadthResult *breadth = checkBreadth( graph.fsm ); - reportBreadthResults( breadth ); - } - - if ( id->condsCheckDepth >= 0 ) { - /* Use this to expand generalized repetition to past the nfa union - * choice point. */ - fsmCtx->condsCheckDepth = id->condsCheckDepth; - graph = FsmAp::condCostSearch( graph.fsm ); - } - - if ( !graph.success() ) { - reportAnalysisResult( graph ); - return graph; - } - - fsmCtx->finalizeInstance( graph.fsm ); - - return graph; -} - -void ParseData::printNameTree( ostream &out ) -{ - /* Print the name instance map. */ - for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ ) - printNameInst( out, *name, 0 ); - - out << "name index:" << endl; - /* Show that the name index is correct. */ - for ( int ni = 0; ni < nextNameId; ni++ ) { - out << ni << ": "; - std::string name = fsmCtx->nameIndex[ni]->name; - out << ( !name.empty() ? name : "" ) << endl; - } -} - -FsmRes ParseData::makeSpecific( GraphDictEl *gdNode ) -{ - /* Build the name tree and supporting data structures. */ - makeNameTree( gdNode ); - - /* Resove name references from gdNode. */ - initNameWalk(); - gdNode->value->resolveNameRefs( this ); - - /* Do not resolve action references. Since we are not building the entire - * graph there's a good chance that many name references will fail. This - * is okay since generating part of the graph is usually only done when - * inspecting the compiled machine. */ - - /* Same story for extern entry point references. */ - - /* Flag this case so that the XML code generator is aware that we haven't - * looked up name references in actions. It can then avoid segfaulting. */ - fsmCtx->generatingSectionSubset = true; - - /* Just building the specified graph. */ - initNameWalk(); - FsmRes mainGraph = makeInstance( gdNode ); - - return mainGraph; -} - -FsmRes ParseData::makeAll() -{ - /* Build the name tree and supporting data structures. */ - makeNameTree( 0 ); - - /* Resove name references in the tree. */ - initNameWalk(); - for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) - glel->value->resolveNameRefs( this ); - - /* Resolve action code name references. */ - resolveActionNameRefs(); - - /* Force name references to the top level instantiations. */ - for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ ) - (*inst)->numRefs += 1; - - FsmAp *mainGraph = 0; - FsmAp **graphs = new FsmAp*[instanceList.length()]; - int numOthers = 0; - - /* Make all the instantiations, we know that main exists in this list. */ - initNameWalk(); - for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { - FsmRes res = makeInstance( glel ); - if ( !res.success() ) { - for ( int i = 0; i < numOthers; i++ ) - delete graphs[i]; - delete[] graphs; - return res; - } - - /* Main graph is always instantiated. */ - if ( glel->key == MAIN_MACHINE ) - mainGraph = res.fsm; - else - graphs[numOthers++] = res.fsm; - } - - if ( mainGraph == 0 ) - mainGraph = graphs[--numOthers]; - - if ( numOthers > 0 ) { - /* Add all the other graphs into main. */ - mainGraph->globOp( graphs, numOthers ); - } - - delete[] graphs; - return FsmRes( FsmRes::Fsm(), mainGraph ); -} - - -void ParseData::makeExportsNameTree() -{ - /* Make a name tree for the exports. */ - initExportsNameWalk(); - - /* First make the name tree. */ - for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { - if ( gdel->value->isExport ) { - /* Recurse on the instance. */ - gdel->value->makeNameTree( gdel->loc, this ); - } - } -} - -void ParseData::makeExports() -{ - makeExportsNameTree(); - - /* Resove name references in the tree. */ - initExportsNameWalk(); - for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { - if ( gdel->value->isExport ) - gdel->value->resolveNameRefs( this ); - } - - /* Make all the instantiations, we know that main exists in this list. */ - initExportsNameWalk(); - for ( GraphDict::Iter gdel = graphDict; gdel.lte(); gdel++ ) { - /* Check if this var def is an export. */ - if ( gdel->value->isExport ) { - /* Build the graph from a walk of the parse tree. */ - FsmRes graph = gdel->value->walk( this ); - - /* Build the graph from a walk of the parse tree. */ - if ( !graph.fsm->checkSingleCharMachine() ) { - id->error(gdel->loc) << "bad export machine, must define " - "a single character" << endl; - } - else { - /* Safe to extract the key and declare the export. */ - Key exportKey = graph.fsm->startState->outList.head->lowKey; - fsmCtx->exportList.append( new Export( gdel->value->name, exportKey ) ); - } - } - } -} - -FsmRes ParseData::prepareMachineGen( GraphDictEl *graphDictEl, const HostLang *hostLang ) -{ - initKeyOps( hostLang ); - makeRootNames(); - initLongestMatchData(); - - /* Make the graph, do minimization. */ - if ( graphDictEl == 0 ) { - FsmRes res = makeAll(); - if ( !res.success() ) - return res; - sectionGraph = res.fsm; - } - else { - FsmRes res = makeSpecific( graphDictEl ); - if ( !res.success() ) - return res; - sectionGraph = res.fsm; - } - - /* If any errors have occured in the input file then don't write anything. */ - if ( id->errorCount > 0 ) - return FsmRes( FsmRes::InternalError() ); - - fsmCtx->analyzeGraph( sectionGraph ); - - /* Depends on the graph analysis. */ - longestMatchInitTweaks( sectionGraph ); - - fsmCtx->prepareReduction( sectionGraph ); - - return FsmRes( FsmRes::Fsm(), sectionGraph ); -} - -void ParseData::generateReduced( const char *inputFileName, CodeStyle codeStyle, - std::ostream &out, const HostLang *hostLang ) -{ - Reducer *red = new Reducer( this->id, fsmCtx, sectionGraph, sectionName, machineId ); - red->make( hostLang, alphType ); - - CodeGenArgs args( this->id, red, alphType, machineId, inputFileName, sectionName, out, codeStyle ); - - args.lineDirectives = !id->noLineDirectives; - args.forceVar = id->forceVar; - args.loopLabels = hostLang->loopLabels; - - /* Write out with it. */ - cgd = (*hostLang->makeCodeGen)( hostLang, args ); - - /* Code generation anlysis step. */ - cgd->genAnalysis(); -} - -#if 0 -void ParseData::generateXML( ostream &out ) -{ - /* Make the generator. */ - XMLCodeGen codeGen( sectionName, machineId, id, this, sectionGraph, out ); - - /* Write out with it. */ - codeGen.writeXML(); -} -#endif - -void ParseData::clear() -{ - cgd->clear(); - - delete sectionGraph; - sectionGraph = 0; - - graphDict.empty(); - - /* Delete all the nodes in the action list. Will cause all the - * string data that represents the actions to be deallocated. */ - fsmCtx->actionList.empty(); -} diff --git a/ragel/parsetree.cc b/ragel/parsetree.cc deleted file mode 100644 index 38646cf6..00000000 --- a/ragel/parsetree.cc +++ /dev/null @@ -1,2199 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include -#include - -/* Parsing. */ -#include "ragel.h" -#include "parsetree.h" -#include "parsedata.h" - -using namespace std; -ostream &operator<<( ostream &out, const NameRef &nameRef ); -ostream &operator<<( ostream &out, const NameInst &nameInst ); - -/* Read string literal (and regex) options and return the true end. */ -const char *checkLitOptions( InputData *id, const InputLoc &loc, - const char *data, int length, bool &caseInsensitive ) -{ - const char *end = data + length - 1; - while ( *end != '\'' && *end != '\"' && *end != '/' ) { - if ( *end == 'i' ) - caseInsensitive = true; - else { - id->error( loc ) << "literal string '" << *end << - "' option not supported" << endl; - } - end -= 1; - } - return end; -} - -/* Convert the literal string which comes in from the scanner into an array of - * characters with escapes and options interpreted. Also null terminates the - * string. Though this null termination should not be relied on for - * interpreting literals in the parser because the string may contain \0 */ -char *prepareLitString( InputData *id, const InputLoc &loc, const char *data, long length, - long &resLen, bool &caseInsensitive ) -{ - char *resData = new char[length+1]; - caseInsensitive = false; - - const char *src = data + 1; - const char *end = checkLitOptions( id, loc, data, length, caseInsensitive ); - - char *dest = resData; - long dlen = 0; - while ( src != end ) { - if ( *src == '\\' ) { - switch ( src[1] ) { - case '0': dest[dlen++] = '\0'; break; - case 'a': dest[dlen++] = '\a'; break; - case 'b': dest[dlen++] = '\b'; break; - case 't': dest[dlen++] = '\t'; break; - case 'n': dest[dlen++] = '\n'; break; - case 'v': dest[dlen++] = '\v'; break; - case 'f': dest[dlen++] = '\f'; break; - case 'r': dest[dlen++] = '\r'; break; - case '\n': break; - default: dest[dlen++] = src[1]; break; - } - src += 2; - } - else { - dest[dlen++] = *src++; - } - } - - resLen = dlen; - resData[resLen] = 0; - return resData; -} - -Key *prepareHexString( ParseData *pd, const InputLoc &loc, - const char *data, long length, long &resLen ) -{ - Key *dest = new Key[( length - 2 ) >> 1]; - const char *src = data; - const char *end = data + length; - long dlen = 0; - char s[3]; - - /* Scan forward over 0x. */ - src += 2; - - s[2] = 0; - while ( src < end ) { - s[0] = src[0]; - s[1] = src[1]; - - dest[dlen++] = makeFsmKeyHex( s, loc, pd ); - - /* Scan forward over the hex chars, then any whitespace or . characters. */ - src += 2; - while ( *src == ' ' || *src == '\t' || *src == '\n' || *src == '.' ) - src += 1; - - /* Scan forward over 0x. */ - src += 2; - } - - resLen = dlen; - return dest; -} - -FsmRes VarDef::walk( ParseData *pd ) -{ - /* We enter into a new name scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* Recurse on the expression. */ - FsmRes rtnVal = machineDef->walk( pd ); - if ( !rtnVal.success() ) - return rtnVal; - - /* Do the tranfer of local error actions. */ - LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name ); - if ( localErrDictEl != 0 ) { - for ( StateList::Iter state = rtnVal.fsm->stateList; state.lte(); state++ ) - rtnVal.fsm->transferErrorActions( state, localErrDictEl->value ); - } - - /* If the expression below is a join operation with multiple expressions - * then it just had epsilon transisions resolved. If it is a join - * with only a single expression then run the epsilon op now. */ - if ( machineDef->type == MachineDef::JoinType && - machineDef->join->exprList.length() == 1 ) - { - rtnVal = FsmAp::epsilonOp( rtnVal.fsm ); - if ( !rtnVal.success() ) - return rtnVal; - } - - /* We can now unset entry points that are not longer used. */ - pd->unsetObsoleteEntries( rtnVal.fsm ); - - /* If the name of the variable is referenced then add the entry point to - * the graph. */ - if ( pd->curNameInst->numRefs > 0 ) - rtnVal.fsm->setEntry( pd->curNameInst->id, rtnVal.fsm->startState ); - - /* Pop the name scope. */ - pd->popNameScope( nameFrame ); - return rtnVal; -} - -void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd ) -{ - /* The variable definition enters a new scope. */ - NameInst *prevNameInst = pd->curNameInst; - pd->curNameInst = pd->addNameInst( loc, name, false ); - - if ( machineDef->type == MachineDef::LongestMatchType ) - pd->curNameInst->isLongestMatch = true; - - /* Recurse. */ - machineDef->makeNameTree( pd ); - - /* The name scope ends, pop the name instantiation. */ - pd->curNameInst = prevNameInst; -} - -void VarDef::resolveNameRefs( ParseData *pd ) -{ - /* Entering into a new scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* Recurse. */ - machineDef->resolveNameRefs( pd ); - - /* The name scope ends, pop the name instantiation. */ - pd->popNameScope( nameFrame ); -} - -VarDef::~VarDef() -{ - delete machineDef; -} - -InputLoc LongestMatchPart::getLoc() -{ - return action != 0 ? action->loc : semiLoc; -} - -/* - * If there are any LMs then all of the following entry points must reset - * tokstart: - * - * 1. fentry(StateRef) - * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef) - * 3. targt of any transition that has an fcall (the return loc). - * 4. start state of all longest match routines. - */ - -Action *LongestMatch::newLmAction( ParseData *pd, const InputLoc &loc, - const char *name, InlineList *inlineList ) -{ - Action *action = new Action( loc, name, inlineList, pd->fsmCtx->nextCondId++ ); - action->embedRoots.append( pd->curNameInst ); - pd->fsmCtx->actionList.append( action ); - action->isLmAction = true; - return action; -} - -void LongestMatch::makeActions( ParseData *pd ) -{ - /* Make actions that set the action id. */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmSetActId ) ); - char *actName = new char[50]; - sprintf( actName, "store%i", lmi->longestMatchId ); - lmi->setActId = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* Make actions that execute the user action and restart on the last - * character. */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmOnLast ) ); - char *actName = new char[50]; - sprintf( actName, "last%i", lmi->longestMatchId ); - lmi->actOnLast = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* Make actions that execute the user action and restart on the next - * character. These actions will set tokend themselves (it is the current - * char). */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmOnNext ) ); - char *actName = new char[50]; - sprintf( actName, "next%i", lmi->longestMatchId ); - lmi->actOnNext = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* Make actions that execute the user action and restart at tokend. These - * actions execute some time after matching the last char. */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmOnLagBehind ) ); - char *actName = new char[50]; - sprintf( actName, "lag%i", lmi->longestMatchId ); - lmi->actLagBehind = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* - * NFA actions - * - * Actions that execute the user action and restart on the next character. - * These actions will set tokend themselves (it is the current char). They - * also reset the nfa machinery used to choose between tokens. - */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmNfaOnLast ) ); - char *actName = new char[50]; - sprintf( actName, "nlast%i", lmi->longestMatchId ); - lmi->actNfaOnLast = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmNfaOnNext ) ); - char *actName = new char[50]; - sprintf( actName, "nnext%i", lmi->longestMatchId ); - lmi->actNfaOnNext = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmNfaOnEof ) ); - char *actName = new char[50]; - sprintf( actName, "neof%i", lmi->longestMatchId ); - lmi->actNfaOnEof = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - InputLoc loc; - loc.line = 1; - loc.col = 1; - loc.fileName = "NONE"; - - /* Create the error action. */ - InlineList *il6 = new InlineList; - il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) ); - lmActSelect = newLmAction( pd, loc, "switch", il6 ); -} - -void LongestMatch::findName( ParseData *pd ) -{ - NameInst *nameInst = pd->curNameInst; - while ( nameInst->name.empty() ) { - nameInst = nameInst->parent; - /* Since every machine must must have a name, we should always find a - * name for the longest match. */ - assert( nameInst != 0 ); - } - name = nameInst->name; -} - -void LongestMatch::makeNameTree( ParseData *pd ) -{ - /* Create an anonymous scope for the longest match. Will be used for - * restarting machine after matching a token. */ - NameInst *prevNameInst = pd->curNameInst; - pd->curNameInst = pd->addNameInst( loc, std::string(), false ); - - /* Recurse into all parts of the longest match operator. */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) - lmi->join->makeNameTree( pd ); - - /* Traverse the name tree upwards to find a name for this lm. */ - findName( pd ); - - /* Also make the longest match's actions at this point. */ - makeActions( pd ); - - /* The name scope ends, pop the name instantiation. */ - pd->curNameInst = prevNameInst; -} - -void LongestMatch::resolveNameRefs( ParseData *pd ) -{ - /* The longest match gets its own name scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* Take an action reference for each longest match item and recurse. */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* Record the reference if the item has an action. */ - if ( lmi->action != 0 ) - lmi->action->embedRoots.append( pd->localNameScope ); - - /* Recurse down the join. */ - lmi->join->resolveNameRefs( pd ); - } - - /* The name scope ends, pop the name instantiation. */ - pd->popNameScope( nameFrame ); -} - -void LongestMatch::restart( FsmAp *graph, TransAp *trans ) -{ - StateAp *fromState = trans->tdap()->fromState; - graph->detachTrans( fromState, trans->tdap()->toState, trans->tdap() ); - graph->attachTrans( fromState, graph->startState, trans->tdap() ); -} - -void LongestMatch::restart( FsmAp *graph, CondAp *cti ) -{ - StateAp *fromState = cti->fromState; - graph->detachTrans( fromState, cti->toState, cti ); - graph->attachTrans( fromState, graph->startState, cti ); -} - -void LongestMatch::transferScannerLeavingActions( FsmAp *graph ) -{ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - if ( st->outActionTable.length() > 0 ) - graph->setErrorActions( st, st->outActionTable ); - } -} - -FsmRes LongestMatch::walkClassic( ParseData *pd ) -{ - /* The longest match has it's own name scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* Make each part of the longest match. */ - FsmAp **parts = new FsmAp*[longestMatchList->length()]; - LmPartList::Iter lmi = *longestMatchList; - for ( int i = 0; lmi.lte(); lmi++, i++ ) { - /* Create the machine and embed the setting of the longest match id. */ - FsmRes res = lmi->join->walk( pd ); - if ( !res.success() ) - return res; - - parts[i] = res.fsm; - parts[i]->longMatchAction( pd->fsmCtx->curActionOrd++, lmi ); - } - - /* Before we union the patterns we need to deal with leaving actions. They - * are transfered to error transitions out of the final states (like local - * error actions) and to eof actions. In the scanner we need to forbid - * on_last for any final state that has an leaving action. */ - for ( int i = 0; i < longestMatchList->length(); i++ ) - transferScannerLeavingActions( parts[i] ); - - /* Union machines one and up with machine zero. The grammar dictates that - * there will always be at least one part. */ - FsmRes res( FsmRes::Fsm(), parts[0] ); - for ( int i = 1; i < longestMatchList->length(); i++ ) { - res = FsmAp::unionOp( res.fsm, parts[i] ); - if ( !res.success() ) - return res; - } - - runLongestMatch( pd, res.fsm ); - - /* Pop the name scope. */ - pd->popNameScope( nameFrame ); - - delete[] parts; - return res; -} - - -FsmRes LongestMatch::walk( ParseData *pd ) -{ - if ( nfaConstruction ) - return walkNfa( pd ); - else - return walkClassic( pd ); -} - -NfaUnion::~NfaUnion() -{ - for ( TermVect::Iter term = terms; term.lte(); term++ ) - delete *term; - if ( roundsList != 0 ) - delete roundsList; -} - -FsmRes NfaUnion::walk( ParseData *pd ) -{ - if ( pd->id->printStatistics ) - pd->id->stats() << "nfa union terms\t" << terms.length() << endl; - - /* Compute the individual expressions. */ - long numMachines = 0; - FsmAp **machines = new FsmAp*[terms.length()]; - for ( TermVect::Iter term = terms; term.lte(); term++ ) { - FsmRes res = (*term)->walk( pd ); - if ( !res.success() ) { - /* Delete previos. */ - for ( int m = 0; m < numMachines; ++m) - delete machines[m]; - delete[] machines; - return res; - } - - machines[numMachines++] = res.fsm; - } - - std::ostream &stats = pd->id->stats(); - bool printStatistics = pd->id->printStatistics; - - return FsmAp::nfaUnion( *roundsList, machines, numMachines, stats, printStatistics ); -} - -void NfaUnion::makeNameTree( ParseData *pd ) -{ - for ( TermVect::Iter term = terms; term.lte(); term++ ) - (*term)->makeNameTree( pd ); -} - -void NfaUnion::resolveNameRefs( ParseData *pd ) -{ - for ( TermVect::Iter term = terms; term.lte(); term++ ) - (*term)->resolveNameRefs( pd ); -} - -FsmRes MachineDef::walk( ParseData *pd ) -{ - switch ( type ) { - case JoinType: - return join->walk( pd ); - case LongestMatchType: - return longestMatch->walk( pd ); - case LengthDefType: - /* Towards lengths. */ - return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( pd->fsmCtx ) ); - case NfaUnionType: - return nfaUnion->walk( pd ); - } - return FsmRes( FsmRes::InternalError() ); -} - -void MachineDef::makeNameTree( ParseData *pd ) -{ - switch ( type ) { - case JoinType: - join->makeNameTree( pd ); - break; - case LongestMatchType: - longestMatch->makeNameTree( pd ); - break; - case LengthDefType: - break; - case NfaUnionType: - nfaUnion->makeNameTree( pd ); - break; - } -} - -void MachineDef::resolveNameRefs( ParseData *pd ) -{ - switch ( type ) { - case JoinType: - join->resolveNameRefs( pd ); - break; - case LongestMatchType: - longestMatch->resolveNameRefs( pd ); - break; - case LengthDefType: - break; - case NfaUnionType: - nfaUnion->resolveNameRefs( pd ); - break; - } -} - -MachineDef::~MachineDef() -{ - if ( join != 0 ) - delete join; - if ( longestMatch != 0 ) - delete longestMatch; - if ( lengthDef != 0 ) - delete lengthDef; - if ( nfaUnion != 0 ) - delete nfaUnion; -} - -/* Construct with a location and the first expression. */ -Join::Join( const InputLoc &loc, Expression *expr ) -: - loc(loc) -{ - exprList.append( expr ); -} - -/* Construct with a location and the first expression. */ -Join::Join( Expression *expr ) -{ - exprList.append( expr ); -} - -/* Walk an expression node. */ -FsmRes Join::walk( ParseData *pd ) -{ - if ( exprList.length() == 1 ) - return exprList.head->walk( pd ); - - return walkJoin( pd ); -} - -/* There is a list of expressions to join. */ -FsmRes Join::walkJoin( ParseData *pd ) -{ - /* We enter into a new name scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* Evaluate the machines. */ - FsmAp **fsms = new FsmAp*[exprList.length()]; - ExprList::Iter expr = exprList; - for ( int e = 0; e < exprList.length(); e++, expr++ ) { - FsmRes res = expr->walk( pd ); - if ( !res.success() ) - return res; - fsms[e] = res.fsm; - } - - /* Get the start and final names. Final is - * guaranteed to exist, start is not. */ - NameInst *startName = pd->curNameInst->start; - NameInst *finalName = pd->curNameInst->final; - - int startId = -1; - if ( startName != 0 ) { - /* Take note that there was an implicit link to the start machine. */ - pd->localNameScope->referencedNames.append( startName ); - startId = startName->id; - } - - /* A final id of -1 indicates there is no epsilon that references the - * final state, therefor do not create one or set an entry point to it. */ - int finalId = -1; - if ( finalName->numRefs > 0 ) - finalId = finalName->id; - - /* Join machines 1 and up onto machine 0. */ - FsmRes res = FsmAp::joinOp( fsms[0], startId, finalId, fsms+1, exprList.length()-1 ); - if ( !res.success() ) - return res; - - /* We can now unset entry points that are not longer used. */ - pd->unsetObsoleteEntries( res.fsm ); - - /* Pop the name scope. */ - pd->popNameScope( nameFrame ); - - delete[] fsms; - return res; -} - -void Join::makeNameTree( ParseData *pd ) -{ - if ( exprList.length() > 1 ) { - /* Create the new anonymous scope. */ - NameInst *prevNameInst = pd->curNameInst; - pd->curNameInst = pd->addNameInst( loc, std::string(), false ); - - /* Join scopes need an implicit "final" target. */ - pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final", - pd->nextNameId++, false ); - - /* Recurse into all expressions in the list. */ - for ( ExprList::Iter expr = exprList; expr.lte(); expr++ ) - expr->makeNameTree( pd ); - - /* The name scope ends, pop the name instantiation. */ - pd->curNameInst = prevNameInst; - } - else { - /* Recurse into the single expression. */ - exprList.head->makeNameTree( pd ); - } -} - - -void Join::resolveNameRefs( ParseData *pd ) -{ - /* Branch on whether or not there is to be a join. */ - if ( exprList.length() > 1 ) { - /* The variable definition enters a new scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* The join scope must contain a start label. */ - NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true ); - if ( resolved.length() > 0 ) { - /* Take the first. */ - pd->curNameInst->start = resolved[0]; - if ( resolved.length() > 1 ) { - /* Complain about the multiple references. */ - pd->id->error(loc) << "join operation has multiple start labels" << endl; - pd->errorStateLabels( resolved ); - } - } - - /* Make sure there is a start label. */ - if ( pd->curNameInst->start != 0 ) { - /* There is an implicit reference to start name. */ - pd->curNameInst->start->numRefs += 1; - } - else { - /* No start label. */ - pd->id->error(loc) << "join operation has no start label" << endl; - } - - /* Recurse into all expressions in the list. */ - for ( ExprList::Iter expr = exprList; expr.lte(); expr++ ) - expr->resolveNameRefs( pd ); - - /* The name scope ends, pop the name instantiation. */ - pd->popNameScope( nameFrame ); - } - else { - /* Recurse into the single expression. */ - exprList.head->resolveNameRefs( pd ); - } -} - -/* Clean up after an expression node. */ -Expression::~Expression() -{ - if ( expression ) - delete expression; - if ( term ) - delete term; -} - -/* Evaluate a single expression node. */ -FsmRes Expression::walk( ParseData *pd, bool lastInSeq ) -{ - switch ( type ) { - case OrType: { - /* Evaluate the expression. */ - FsmRes exprFsm = expression->walk( pd, false ); - if ( !exprFsm.success() ) - return exprFsm; - - /* Evaluate the term. */ - FsmRes rhs = term->walk( pd ); - if ( !rhs.success() ) - return rhs; - - /* Perform union. */ - FsmRes res = FsmAp::unionOp( exprFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case IntersectType: { - /* Evaluate the expression. */ - FsmRes exprFsm = expression->walk( pd ); - if ( !exprFsm.success() ) - return exprFsm; - - /* Evaluate the term. */ - FsmRes rhs = term->walk( pd ); - if ( !rhs.success() ) - return rhs; - - /* Perform intersection. */ - FsmRes res = FsmAp::intersectOp( exprFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case SubtractType: { - /* Evaluate the expression. */ - FsmRes exprFsm = expression->walk( pd ); - if ( !exprFsm.success() ) - return exprFsm; - - /* Evaluate the term. */ - FsmRes rhs = term->walk( pd ); - if ( !rhs.success() ) - return rhs; - - /* Perform subtraction. */ - FsmRes res = FsmAp::subtractOp( exprFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case StrongSubtractType: { - /* Evaluate the expression. */ - FsmRes exprFsm = expression->walk( pd ); - if ( !exprFsm.success() ) - return exprFsm; - - FsmAp *leadAnyStar = FsmAp::dotStarFsm( pd->fsmCtx ); - FsmAp *trailAnyStar = FsmAp::dotStarFsm( pd->fsmCtx ); - - /* Evaluate the term and pad it with any* machines. */ - FsmRes termFsm = term->walk( pd ); - if ( !termFsm.success() ) - return termFsm; - - FsmRes res1 = FsmAp::concatOp( leadAnyStar, termFsm.fsm ); - if ( !res1.success() ) - return res1; - - FsmRes res2 = FsmAp::concatOp( res1.fsm, trailAnyStar ); - if ( !res2.success() ) - return res2; - - /* Perform subtraction. */ - FsmRes res3 = FsmAp::subtractOp( exprFsm.fsm, res2.fsm, lastInSeq ); - if ( !res3.success() ) - return res3; - - return res3; - } - case TermType: { - /* Return result of the term. */ - return term->walk( pd ); - } - case BuiltinType: { - /* Construct the builtin. */ - return FsmRes( FsmRes::Fsm(), makeBuiltin( builtin, pd ) ); - } - } - - return FsmRes( FsmRes::InternalError() ); -} - -void Expression::makeNameTree( ParseData *pd ) -{ - switch ( type ) { - case OrType: - case IntersectType: - case SubtractType: - case StrongSubtractType: - expression->makeNameTree( pd ); - term->makeNameTree( pd ); - break; - case TermType: - term->makeNameTree( pd ); - break; - case BuiltinType: - break; - } -} - -void Expression::resolveNameRefs( ParseData *pd ) -{ - switch ( type ) { - case OrType: - case IntersectType: - case SubtractType: - case StrongSubtractType: - expression->resolveNameRefs( pd ); - term->resolveNameRefs( pd ); - break; - case TermType: - term->resolveNameRefs( pd ); - break; - case BuiltinType: - break; - } -} - -/* Clean up after a term node. */ -Term::~Term() -{ - if ( term ) - delete term; - if ( factorWithAug ) - delete factorWithAug; -} - -/* Evaluate a term node. */ -FsmRes Term::walk( ParseData *pd, bool lastInSeq ) -{ - switch ( type ) { - case ConcatType: { - /* Evaluate the Term. */ - FsmRes termFsm = term->walk( pd, false ); - if ( !termFsm.success() ) - return termFsm; - - /* Evaluate the FactorWithRep. */ - FsmRes rhs = factorWithAug->walk( pd ); - if ( !rhs.success() ) { - delete termFsm.fsm; - return rhs; - } - - /* Perform concatenation. */ - FsmRes res = FsmAp::concatOp( termFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case RightStartType: { - /* Evaluate the Term. */ - FsmRes termFsm = term->walk( pd ); - if ( !termFsm.success() ) - return termFsm; - - /* Evaluate the FactorWithRep. */ - FsmRes rhs = factorWithAug->walk( pd ); - if ( !rhs.success() ) { - delete termFsm.fsm; - return rhs; - } - - /* Perform concatenation. */ - FsmRes res = FsmAp::rightStartConcatOp( termFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case RightFinishType: { - /* Evaluate the Term. */ - FsmRes termFsm = term->walk( pd ); - if ( !termFsm.success() ) - return termFsm; - - /* Evaluate the FactorWithRep. */ - FsmRes rhs = factorWithAug->walk( pd ); - if ( !rhs.success() ) { - delete termFsm.fsm; - return rhs; - } - - /* Set up the priority descriptors. The left machine gets the - * lower priority where as the finishing transitions to the right - * get the higher priority. */ - priorDescs[0].key = pd->fsmCtx->nextPriorKey++; - priorDescs[0].priority = 0; - termFsm.fsm->allTransPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[0] ); - - /* The finishing transitions of the right machine get the higher - * priority. Use the same unique key. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 1; - rhs.fsm->finishFsmPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[1] ); - - /* If the right machine's start state is final we need to guard - * against the left machine persisting by moving through the empty - * string. */ - if ( rhs.fsm->startState->isFinState() ) { - rhs.fsm->startState->outPriorTable.setPrior( - pd->fsmCtx->curPriorOrd++, &priorDescs[1] ); - } - - /* Perform concatenation. */ - FsmRes res = FsmAp::concatOp( termFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case LeftType: { - /* Evaluate the Term. */ - FsmRes termFsm = term->walk( pd ); - if ( !termFsm.success() ) - return termFsm; - - /* Evaluate the FactorWithRep. */ - FsmRes rhs = factorWithAug->walk( pd ); - if ( !rhs.success() ) { - delete termFsm.fsm; - return rhs; - } - - /* Set up the priority descriptors. The left machine gets the - * higher priority. */ - priorDescs[0].key = pd->fsmCtx->nextPriorKey++; - priorDescs[0].priority = 1; - termFsm.fsm->allTransPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[0] ); - - /* The right machine gets the lower priority. We cannot use - * allTransPrior here in case the start state of the right machine - * is final. It would allow the right machine thread to run along - * with the left if just passing through the start state. Using - * startFsmPrior prevents this. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 0; - rhs.fsm->startFsmPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[1] ); - - /* Perform concatenation. */ - FsmRes res = FsmAp::concatOp( termFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case FactorWithAugType: { - return factorWithAug->walk( pd ); - } - } - return FsmRes( FsmRes::InternalError() ); -} - -void Term::makeNameTree( ParseData *pd ) -{ - switch ( type ) { - case ConcatType: - case RightStartType: - case RightFinishType: - case LeftType: - term->makeNameTree( pd ); - factorWithAug->makeNameTree( pd ); - break; - case FactorWithAugType: - factorWithAug->makeNameTree( pd ); - break; - } -} - -void Term::resolveNameRefs( ParseData *pd ) -{ - switch ( type ) { - case ConcatType: - case RightStartType: - case RightFinishType: - case LeftType: - term->resolveNameRefs( pd ); - factorWithAug->resolveNameRefs( pd ); - break; - case FactorWithAugType: - factorWithAug->resolveNameRefs( pd ); - break; - } -} - -/* Clean up after a factor with augmentation node. */ -FactorWithAug::~FactorWithAug() -{ - delete factorWithRep; - - /* Walk the vector of parser actions, deleting function names. */ - - /* Clean up priority descriptors. */ - if ( priorDescs != 0 ) - delete[] priorDescs; -} - -void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd ) -{ - /* Assign actions. */ - for ( int i = 0; i < actions.length(); i++ ) { - switch ( actions[i].type ) { - /* Transition actions. */ - case at_start: - graph->startFsmAction( actionOrd[i], actions[i].action ); - break; - case at_all: - graph->allTransAction( actionOrd[i], actions[i].action ); - break; - case at_finish: - graph->finishFsmAction( actionOrd[i], actions[i].action ); - break; - case at_leave: - graph->leaveFsmAction( actionOrd[i], actions[i].action ); - break; - - /* Global error actions. */ - case at_start_gbl_error: - graph->startErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_all_gbl_error: - graph->allErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_final_gbl_error: - graph->finalErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_not_start_gbl_error: - graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_not_final_gbl_error: - graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_middle_gbl_error: - graph->middleErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - - /* Local error actions. */ - case at_start_local_error: - graph->startErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_all_local_error: - graph->allErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_final_local_error: - graph->finalErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_not_start_local_error: - graph->notStartErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_not_final_local_error: - graph->notFinalErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_middle_local_error: - graph->middleErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - - /* EOF actions. */ - case at_start_eof: - graph->startEOFAction( actionOrd[i], actions[i].action ); - break; - case at_all_eof: - graph->allEOFAction( actionOrd[i], actions[i].action ); - break; - case at_final_eof: - graph->finalEOFAction( actionOrd[i], actions[i].action ); - break; - case at_not_start_eof: - graph->notStartEOFAction( actionOrd[i], actions[i].action ); - break; - case at_not_final_eof: - graph->notFinalEOFAction( actionOrd[i], actions[i].action ); - break; - case at_middle_eof: - graph->middleEOFAction( actionOrd[i], actions[i].action ); - break; - - /* To State Actions. */ - case at_start_to_state: - graph->startToStateAction( actionOrd[i], actions[i].action ); - break; - case at_all_to_state: - graph->allToStateAction( actionOrd[i], actions[i].action ); - break; - case at_final_to_state: - graph->finalToStateAction( actionOrd[i], actions[i].action ); - break; - case at_not_start_to_state: - graph->notStartToStateAction( actionOrd[i], actions[i].action ); - break; - case at_not_final_to_state: - graph->notFinalToStateAction( actionOrd[i], actions[i].action ); - break; - case at_middle_to_state: - graph->middleToStateAction( actionOrd[i], actions[i].action ); - break; - - /* From State Actions. */ - case at_start_from_state: - graph->startFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_all_from_state: - graph->allFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_final_from_state: - graph->finalFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_not_start_from_state: - graph->notStartFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_not_final_from_state: - graph->notFinalFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_middle_from_state: - graph->middleFromStateAction( actionOrd[i], actions[i].action ); - break; - - /* Remaining cases, prevented by the parser. */ - default: - assert( false ); - break; - } - } -} - -void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd ) -{ - /* Assign priorities. */ - for ( int i = 0; i < priorityAugs.length(); i++ ) { - switch ( priorityAugs[i].type ) { - case at_start: - graph->startFsmPrior( priorOrd[i], &priorDescs[i]); - break; - case at_all: - graph->allTransPrior( priorOrd[i], &priorDescs[i] ); - break; - case at_finish: - graph->finishFsmPrior( priorOrd[i], &priorDescs[i] ); - break; - case at_leave: - graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] ); - break; - - default: - /* Parser Prevents this case. */ - break; - } - } -} - -void FactorWithAug::assignConditions( FsmAp *graph ) -{ - for ( int i = 0; i < conditions.length(); i++ ) { - switch ( conditions[i].type ) { - /* Transition actions. */ - case at_start: - graph->startFsmCondition( conditions[i].action, conditions[i].sense ); - break; - case at_all: - graph->allTransCondition( conditions[i].action, conditions[i].sense ); - break; - case at_leave: - graph->leaveFsmCondition( conditions[i].action, conditions[i].sense ); - break; - default: - break; - } - } -} - -/* Evaluate a factor with augmentation node. */ -FsmRes FactorWithAug::walk( ParseData *pd ) -{ - /* Enter into the scopes created for the labels. */ - NameFrame nameFrame = pd->enterNameScope( false, labels.size() ); - - /* Make the array of function orderings. */ - int *actionOrd = 0; - if ( actions.length() > 0 ) - actionOrd = new int[actions.length()]; - - /* First walk the list of actions, assigning order to all starting - * actions. */ - for ( int i = 0; i < actions.length(); i++ ) { - if ( actions[i].type == at_start || - actions[i].type == at_start_gbl_error || - actions[i].type == at_start_local_error || - actions[i].type == at_start_to_state || - actions[i].type == at_start_from_state || - actions[i].type == at_start_eof ) - actionOrd[i] = pd->fsmCtx->curActionOrd++; - } - - /* Evaluate the factor with repetition. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) { - delete [] actionOrd; - return factorTree; - } - - FsmAp *rtnVal = factorTree.fsm; - - /* Compute the remaining action orderings. */ - for ( int i = 0; i < actions.length(); i++ ) { - if ( actions[i].type != at_start && - actions[i].type != at_start_gbl_error && - actions[i].type != at_start_local_error && - actions[i].type != at_start_to_state && - actions[i].type != at_start_from_state && - actions[i].type != at_start_eof ) - actionOrd[i] = pd->fsmCtx->curActionOrd++; - } - - /* Embed conditions. */ - assignConditions( rtnVal ); - - /* Embed actions. */ - assignActions( pd, rtnVal , actionOrd ); - - /* Make the array of priority orderings. Orderings are local to this walk - * of the factor with augmentation. */ - int *priorOrd = 0; - if ( priorityAugs.length() > 0 ) - priorOrd = new int[priorityAugs.length()]; - - /* Walk all priorities, assigning the priority ordering. */ - for ( int i = 0; i < priorityAugs.length(); i++ ) - priorOrd[i] = pd->fsmCtx->curPriorOrd++; - - /* If the priority descriptors have not been made, make them now. Make - * priority descriptors for each priority asignment that will be passed to - * the fsm. Used to keep track of the key, value and used bit. */ - if ( priorDescs == 0 && priorityAugs.length() > 0 ) { - priorDescs = new PriorDesc[priorityAugs.length()]; - for ( int i = 0; i < priorityAugs.length(); i++ ) { - /* Init the prior descriptor for the priority setting. */ - priorDescs[i].key = priorityAugs[i].priorKey; - priorDescs[i].priority = priorityAugs[i].priorValue; - priorDescs[i].guarded = false; - priorDescs[i].guardId = 0; - } - } - - /* Assign priorities into the machine. */ - assignPriorities( rtnVal, priorOrd ); - - /* Assign epsilon transitions. */ - for ( int e = 0; e < epsilonLinks.length(); e++ ) { - /* Get the name, which may not exist. If it doesn't then silently - * ignore it because an error has already been reported. */ - NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++]; - if ( epTarg != 0 ) { - /* Make the epsilon transitions. */ - rtnVal->epsilonTrans( epTarg->id ); - - /* Note that we have made a link to the name. */ - pd->localNameScope->referencedNames.append( epTarg ); - } - } - - /* Set entry points for labels. */ - if ( labels.size() > 0 ) { - /* Pop the names. */ - pd->resetNameScope( nameFrame ); - - /* Make labels that are referenced into entry points. */ - for ( size_t i = 0; i < labels.size(); i++ ) { - pd->enterNameScope( false, 1 ); - - /* Will always be found. */ - NameInst *name = pd->curNameInst; - - /* If the name is referenced then set the entry point. */ - if ( name->numRefs > 0 ) - rtnVal->setEntry( name->id, rtnVal->startState ); - - if ( labels[i].cut ) - pd->cuts.append( ParseData::Cut( labels[i].data, name->id ) ); - } - - pd->popNameScope( nameFrame ); - } - - if ( priorOrd != 0 ) - delete[] priorOrd; - if ( actionOrd != 0 ) - delete[] actionOrd; - return FsmRes( FsmRes::Fsm(), rtnVal ); -} - -void FactorWithAug::makeNameTree( ParseData *pd ) -{ - /* Add the labels to the tree of instantiated names. Each label - * makes a new scope. */ - NameInst *prevNameInst = pd->curNameInst; - for ( size_t i = 0; i < labels.size(); i++ ) { - pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true ); - - if ( labels[i].cut ) - pd->curNameInst->numRefs += 1; - } - - /* Recurse, then pop the names. */ - factorWithRep->makeNameTree( pd ); - pd->curNameInst = prevNameInst; -} - - -void FactorWithAug::resolveNameRefs( ParseData *pd ) -{ - /* Enter into the name scope created by any labels. */ - NameFrame nameFrame = pd->enterNameScope( false, labels.size() ); - - /* Note action references. */ - for ( int i = 0; i < actions.length(); i++ ) - actions[i].action->embedRoots.append( pd->localNameScope ); - - /* Recurse first. IMPORTANT: we must do the exact same traversal as when - * the tree is constructed. */ - factorWithRep->resolveNameRefs( pd ); - - /* Resolve epsilon transitions. */ - for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) { - /* Get the link. */ - EpsilonLink &link = epsilonLinks[ep]; - NameInst *resolvedName = 0; - - if ( link.target->length() == 1 && link.target->data[0] == "final" ) { - /* Epsilon drawn to an implicit final state. An implicit final is - * only available in join operations. */ - resolvedName = pd->localNameScope->final; - } - else { - /* Do an search for the name. */ - NameSet resolved; - pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 ); - if ( resolved.length() > 0 ) { - /* Take the first one. */ - resolvedName = resolved[0]; - if ( resolved.length() > 1 ) { - /* Complain about the multiple references. */ - pd->id->error(link.loc) << "state reference " << link.target << - " resolves to multiple entry points" << endl; - pd->errorStateLabels( resolved ); - } - } - } - - /* This is tricky, we stuff resolved epsilon transitions into one long - * vector in the parse data structure. Since the name resolution and - * graph generation both do identical walks of the parse tree we - * should always find the link resolutions in the right place. */ - pd->epsilonResolvedLinks.append( resolvedName ); - - if ( resolvedName != 0 ) { - /* Found the name, bump of the reference count on it. */ - resolvedName->numRefs += 1; - } - else { - /* Complain, no recovery action, the epsilon op will ignore any - * epsilon transitions whose names did not resolve. */ - pd->id->error(link.loc) << "could not resolve label " << link.target << endl; - } - } - - if ( labels.size() > 0 ) - pd->popNameScope( nameFrame ); -} - - -/* Clean up after a factor with repetition node. */ -FactorWithRep::~FactorWithRep() -{ - switch ( type ) { - case StarType: case StarStarType: case OptionalType: case PlusType: - case ExactType: case MaxType: case MinType: case RangeType: - delete factorWithRep; - case FactorWithNegType: - delete factorWithNeg; - break; - } -} - - -/* Evaluate a factor with repetition node. */ -FsmRes FactorWithRep::walk( ParseData *pd ) -{ - switch ( type ) { - case StarType: { - /* Evaluate the FactorWithRep. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying kleene star to a machine that " - "accepts zero length word" << endl; - factorTree.fsm->unsetFinState( factorTree.fsm->startState ); - } - - return FsmAp::starOp( factorTree.fsm ); - } - case StarStarType: { - /* Evaluate the FactorWithRep. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying kleene star to a machine that " - "accepts zero length word" << endl; - } - - /* Set up the prior descs. All gets priority one, whereas leaving gets - * priority zero. Make a unique key so that these priorities don't - * interfere with any priorities set by the user. */ - priorDescs[0].key = pd->fsmCtx->nextPriorKey++; - priorDescs[0].priority = 1; - factorTree.fsm->allTransPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[0] ); - - /* Leaveing gets priority 0. Use same unique key. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 0; - factorTree.fsm->leaveFsmPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[1] ); - - return FsmAp::starOp( factorTree.fsm ); - } - case OptionalType: { - /* Evaluate the FactorWithRep. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - return FsmAp::questionOp( factorTree.fsm ); - } - case PlusType: { - /* Evaluate the FactorWithRep. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying plus operator to a machine that " - "accepts zero length word" << endl; - } - - return FsmAp::plusOp( factorTree.fsm ); - } - case ExactType: { - /* Evaluate the first FactorWithRep. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - /* Get an int from the repetition amount. */ - if ( lowerRep == 0 ) { - /* No copies. Don't need to evaluate the factorWithRep. - * This Defeats the purpose so give a warning. */ - pd->id->warning(loc) << "exactly zero repetitions results " - "in the null machine" << endl; - } - else { - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying repetition to a machine that " - "accepts zero length word" << endl; - } - } - - /* Handles the n == 0 case. */ - return FsmAp::exactRepeatOp( factorTree.fsm, lowerRep ); - } - case MaxType: { - /* Evaluate the first FactorWithRep. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - /* Get an int from the repetition amount. */ - if ( upperRep == 0 ) { - /* No copies. Don't need to evaluate the factorWithRep. - * This Defeats the purpose so give a warning. */ - pd->id->warning(loc) << "max zero repetitions results " - "in the null machine" << endl; - - return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( pd->fsmCtx ) ); - } - else { - - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying max repetition to a machine that " - "accepts zero length word" << endl; - } - } - - /* Do the repetition on the machine. Handles the n == 0 case. */ - return FsmAp::maxRepeatOp( factorTree.fsm, upperRep ); - } - case MinType: { - /* Evaluate the repeated machine. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying min repetition to a machine that " - "accepts zero length word" << endl; - } - - return FsmAp::minRepeatOp( factorTree.fsm, lowerRep ); - } - case RangeType: { - /* Check for bogus range. */ - if ( upperRep - lowerRep < 0 ) { - pd->id->error(loc) << "invalid range repetition" << endl; - - /* Return null machine as recovery. */ - return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( pd->fsmCtx ) ); - } - - /* Now need to evaluate the repeated machine. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - if ( lowerRep == 0 && upperRep == 0 ) { - /* No copies. Don't need to evaluate the factorWithRep. This - * defeats the purpose so give a warning. */ - pd->id->warning(loc) << "zero to zero repetitions results " - "in the null machine" << endl; - } - else { - - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying range repetition to a machine that " - "accepts zero length word" << endl; - } - - } - return FsmAp::rangeRepeatOp( factorTree.fsm, lowerRep, upperRep ); - } - case FactorWithNegType: { - /* Evaluate the Factor. Pass it up. */ - return factorWithNeg->walk( pd ); - }} - return FsmRes( FsmRes::InternalError() ); -} - -void FactorWithRep::makeNameTree( ParseData *pd ) -{ - switch ( type ) { - case StarType: - case StarStarType: - case OptionalType: - case PlusType: - case ExactType: - case MaxType: - case MinType: - case RangeType: - factorWithRep->makeNameTree( pd ); - break; - case FactorWithNegType: - factorWithNeg->makeNameTree( pd ); - break; - } -} - -void FactorWithRep::resolveNameRefs( ParseData *pd ) -{ - switch ( type ) { - case StarType: - case StarStarType: - case OptionalType: - case PlusType: - case ExactType: - case MaxType: - case MinType: - case RangeType: - factorWithRep->resolveNameRefs( pd ); - break; - case FactorWithNegType: - factorWithNeg->resolveNameRefs( pd ); - break; - } -} - -/* Clean up after a factor with negation node. */ -FactorWithNeg::~FactorWithNeg() -{ - switch ( type ) { - case NegateType: - case CharNegateType: - delete factorWithNeg; - break; - case FactorType: - delete factor; - break; - } -} - -/* Evaluate a factor with negation node. */ -FsmRes FactorWithNeg::walk( ParseData *pd ) -{ - switch ( type ) { - case NegateType: { - /* Evaluate the factorWithNeg. */ - FsmRes toNegate = factorWithNeg->walk( pd ); - - /* Negation is subtract from dot-star. */ - FsmAp *ds = FsmAp::dotStarFsm( pd->fsmCtx ); - FsmRes res = FsmAp::subtractOp( ds, toNegate.fsm ); - - return res; - } - case CharNegateType: { - /* Evaluate the factorWithNeg. */ - FsmRes toNegate = factorWithNeg->walk( pd ); - - /* CharNegation is subtract from dot. */ - FsmAp *ds = FsmAp::dotFsm( pd->fsmCtx ); - FsmRes res = FsmAp::subtractOp( ds, toNegate.fsm ); - - return res; - } - case FactorType: { - /* Evaluate the Factor. Pass it up. */ - return factor->walk( pd ); - }} - return FsmRes( FsmRes::InternalError() ); -} - -void FactorWithNeg::makeNameTree( ParseData *pd ) -{ - switch ( type ) { - case NegateType: - case CharNegateType: - factorWithNeg->makeNameTree( pd ); - break; - case FactorType: - factor->makeNameTree( pd ); - break; - } -} - -void FactorWithNeg::resolveNameRefs( ParseData *pd ) -{ - switch ( type ) { - case NegateType: - case CharNegateType: - factorWithNeg->resolveNameRefs( pd ); - break; - case FactorType: - factor->resolveNameRefs( pd ); - break; - } -} - -/* Clean up after a factor node. */ -Factor::~Factor() -{ - switch ( type ) { - case LiteralType: - delete literal; - break; - case RangeType: - delete range; - break; - case OrExprType: - delete reItem; - break; - case RegExprType: - delete regExpr; - break; - case ReferenceType: - break; - case ParenType: - delete join; - break; - case LongestMatchType: - delete longestMatch; - break; - case NfaWrap: case NfaRep: - case CondStar: case CondPlus: - delete expression; - break; - } -} - - -/* Evaluate a factor node. */ -FsmRes Factor::walk( ParseData *pd ) -{ - switch ( type ) { - case LiteralType: - return FsmRes( FsmRes::Fsm(), literal->walk( pd ) ); - case RangeType: - return FsmRes( FsmRes::Fsm(), range->walk( pd ) ); - case OrExprType: - return reItem->walk( pd, 0 ); - case RegExprType: - return FsmRes( FsmRes::Fsm(), regExpr->walk( pd, 0 ) ); - case ReferenceType: - return varDef->walk( pd ); - case ParenType: - return join->walk( pd ); - case LongestMatchType: - return longestMatch->walk( pd ); - case NfaRep: { - FsmRes exprTree = expression->walk( pd ); - - if ( mode == Factor::NfaLegacy ) { - FsmRes res = FsmAp::nfaRepeatOp( exprTree.fsm, action1, action2, action3, - action4, action5, action6 ); - - res.fsm->verifyIntegrity(); - return res; - } - else if ( mode == Factor::NfaLazy ) { - FsmRes res = FsmAp::nfaRepeatOp2( exprTree.fsm, action1, action2, action3, - action4, action5, action6, FsmAp::NfaLazy ); - - res.fsm->verifyIntegrity(); - return res; - } - else { - FsmRes res = FsmAp::nfaRepeatOp2( exprTree.fsm, action1, action2, action3, - action4, action5, action6, FsmAp::NfaGreedy ); - - res.fsm->verifyIntegrity(); - return res; - } - } - case NfaWrap: { - FsmRes exprTree = expression->walk( pd ); - if ( mode == Factor::NfaLazy ) { - FsmRes res = FsmAp::nfaWrap( exprTree.fsm, action1, action2, action3, - action4, /* action5, */ action6, FsmAp::NfaLazy ); - - res.fsm->verifyIntegrity(); - return res; - } - else { - FsmRes res = FsmAp::nfaWrap( exprTree.fsm, action1, action2, action3, - action4, /* action5, */ action6, FsmAp::NfaGreedy ); - - res.fsm->verifyIntegrity(); - return res; - } - } - case CondStar: { - FsmRes exprTree = expression->walk( pd ); - if ( !exprTree.success() ) - return exprTree; - - if ( exprTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying plus operator to a machine that " - "accepts zero length word" << endl; - } - - return FsmAp::condStar( exprTree.fsm, repId, action1, action2, action3, action4 ); - } - case CondPlus: { - FsmRes exprTree = expression->walk( pd ); - if ( !exprTree.success() ) - return exprTree; - - if ( exprTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying plus operator to a machine that " - "accepts zero length word" << endl; - } - - return FsmAp::condPlus( exprTree.fsm, repId, action1, action2, action3, action4 ); - }} - - return FsmRes( FsmRes::InternalError() ); -} - -void Factor::makeNameTree( ParseData *pd ) -{ - switch ( type ) { - case LiteralType: - case RangeType: - case OrExprType: - case RegExprType: - break; - case ReferenceType: - varDef->makeNameTree( loc, pd ); - break; - case ParenType: - join->makeNameTree( pd ); - break; - case LongestMatchType: - longestMatch->makeNameTree( pd ); - break; - case NfaWrap: - case NfaRep: - case CondStar: - case CondPlus: - expression->makeNameTree( pd ); - break; - } -} - -void Factor::resolveNameRefs( ParseData *pd ) -{ - switch ( type ) { - case LiteralType: - case RangeType: - case OrExprType: - case RegExprType: - break; - case ReferenceType: - varDef->resolveNameRefs( pd ); - break; - case ParenType: - join->resolveNameRefs( pd ); - break; - case LongestMatchType: - longestMatch->resolveNameRefs( pd ); - break; - case NfaRep: - case NfaWrap: - case CondStar: - case CondPlus: - expression->resolveNameRefs( pd ); - break; - } -} - -/* Clean up a range object. Must delete the two literals. */ -Range::~Range() -{ - delete lowerLit; - delete upperLit; -} - -/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */ -FsmAp *Range::walk( ParseData *pd ) -{ - /* Construct and verify the suitability of the lower end of the range. */ - FsmAp *lowerFsm = lowerLit->walk( pd ); - if ( !lowerFsm->checkSingleCharMachine() ) { - pd->id->error(lowerLit->loc) << - "bad range lower end, must be a single character" << endl; - } - - /* Construct and verify the upper end. */ - FsmAp *upperFsm = upperLit->walk( pd ); - if ( !upperFsm->checkSingleCharMachine() ) { - pd->id->error(upperLit->loc) << - "bad range upper end, must be a single character" << endl; - } - - /* Grab the keys from the machines, then delete them. */ - Key lowKey = lowerFsm->startState->outList.head->lowKey; - Key highKey = upperFsm->startState->outList.head->lowKey; - delete lowerFsm; - delete upperFsm; - - /* Validate the range. */ - if ( pd->fsmCtx->keyOps->gt( lowKey, highKey ) ) { - /* Recover by setting upper to lower; */ - pd->id->error(lowerLit->loc) << "lower end of range is greater then upper end" << endl; - highKey = lowKey; - } - - /* Return the range now that it is validated. */ - FsmAp *retFsm; - if ( caseIndep ) - retFsm = FsmAp::rangeFsmCI( pd->fsmCtx, lowKey, highKey ); - else - retFsm = FsmAp::rangeFsm( pd->fsmCtx, lowKey, highKey ); - - return retFsm; -} - -/* Evaluate a literal object. */ -FsmAp *Literal::walk( ParseData *pd ) -{ - /* FsmAp to return, is the alphabet signed. */ - FsmAp *rtnVal = 0; - - switch ( type ) { - case Number: { - /* Make a C string. Maybe put - up front. */ - Vector num = data; - if ( neg ) - num.insert( 0, '-' ); - num.append( 0 ); - - /* Make the fsm key in int format. */ - Key fsmKey = makeFsmKeyNum( num.data, loc, pd ); - - /* Make the new machine. */ - rtnVal = FsmAp::concatFsm( pd->fsmCtx, fsmKey ); - break; - } - case LitString: { - /* Make the array of keys in int format. */ - long length; - bool caseInsensitive; - char *litstr = prepareLitString( pd->id, loc, data.data, data.length(), - length, caseInsensitive ); - Key *arr = new Key[length]; - makeFsmKeyArray( arr, litstr, length, pd ); - - /* Make the new machine. */ - if ( caseInsensitive ) - rtnVal = FsmAp::concatFsmCI( pd->fsmCtx, arr, length ); - else - rtnVal = FsmAp::concatFsm( pd->fsmCtx, arr, length ); - delete[] litstr; - delete[] arr; - break; - } - case HexString: { - long length; - Key *arr = prepareHexString( pd, loc, data.data, data.length(), length ); - rtnVal = FsmAp::concatFsm( pd->fsmCtx, arr, length ); - delete[] arr; - break; - }} - return rtnVal; -} - -/* Clean up after a regular expression object. */ -RegExpr::~RegExpr() -{ - switch ( type ) { - case RecurseItem: - delete regExpr; - delete item; - break; - case Empty: - break; - } -} - -/* Evaluate a regular expression object. */ -FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex ) -{ - /* This is the root regex, pass down a pointer to this. */ - if ( rootRegex == 0 ) - rootRegex = this; - - FsmAp *rtnVal = 0; - switch ( type ) { - case RecurseItem: { - /* Walk both items. */ - rtnVal = regExpr->walk( pd, rootRegex ); - FsmRes fsm2 = item->walk( pd, rootRegex ); - FsmRes res = FsmAp::concatOp( rtnVal, fsm2.fsm ); - rtnVal = res.fsm; - break; - } - case Empty: { - rtnVal = FsmAp::lambdaFsm( pd->fsmCtx ); - break; - } - } - return rtnVal; -} - -/* Clean up after an item in a regular expression. */ -ReItem::~ReItem() -{ - switch ( type ) { - case Data: - case Dot: - break; - case OrBlock: - case NegOrBlock: - delete orBlock; - break; - } -} - -/* Evaluate a regular expression object. */ -FsmRes ReItem::walk( ParseData *pd, RegExpr *rootRegex ) -{ - /* The fsm to return, is the alphabet signed? */ - FsmAp *rtnVal = 0; - - switch ( type ) { - case Data: { - /* Move the data into an integer array and make a concat fsm. */ - Key *arr = new Key[data.length()]; - makeFsmKeyArray( arr, data.data, data.length(), pd ); - - /* Make the concat fsm. */ - if ( rootRegex != 0 && rootRegex->caseInsensitive ) - rtnVal = FsmAp::concatFsmCI( pd->fsmCtx, arr, data.length() ); - else - rtnVal = FsmAp::concatFsm( pd->fsmCtx, arr, data.length() ); - delete[] arr; - break; - } - case Dot: { - /* Make the dot fsm. */ - rtnVal = FsmAp::dotFsm( pd->fsmCtx ); - break; - } - case OrBlock: { - /* Get the or block and minmize it. */ - rtnVal = orBlock->walk( pd, rootRegex ); - if ( rtnVal == 0 ) - rtnVal = FsmAp::lambdaFsm( pd->fsmCtx ); - rtnVal->minimizePartition2(); - break; - } - case NegOrBlock: { - /* Get the or block and minimize it. */ - FsmAp *fsm = orBlock->walk( pd, rootRegex ); - fsm->minimizePartition2(); - - /* Make a dot fsm and subtract from it. */ - rtnVal = FsmAp::dotFsm( pd->fsmCtx ); - FsmRes res = FsmAp::subtractOp( rtnVal, fsm ); - rtnVal = res.fsm; - rtnVal->minimizePartition2(); - break; - } - } - - /* If the item is followed by a star, then apply the star op. */ - if ( star ) { - if ( rtnVal->startState->isFinState() ) { - pd->id->warning(loc) << "applying kleene star to a machine that " - "accepts zero length word" << endl; - } - - FsmRes res = FsmAp::starOp( rtnVal ); - rtnVal = res.fsm; - rtnVal->minimizePartition2(); - } - - return FsmRes( FsmRes::Fsm(), rtnVal ); -} - -/* Clean up after an or block of a regular expression. */ -ReOrBlock::~ReOrBlock() -{ - switch ( type ) { - case RecurseItem: - delete orBlock; - delete item; - break; - case Empty: - break; - } -} - - -/* Evaluate an or block of a regular expression. */ -FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex ) -{ - FsmAp *rtnVal = 0; - switch ( type ) { - case RecurseItem: { - /* Evaluate the two fsm. */ - FsmAp *fsm1 = orBlock->walk( pd, rootRegex ); - FsmAp *fsm2 = item->walk( pd, rootRegex ); - if ( fsm1 == 0 ) - rtnVal = fsm2; - else { - FsmRes res = FsmAp::unionOp( fsm1, fsm2 ); - fsm1 = res.fsm; - rtnVal = fsm1; - } - break; - } - case Empty: { - rtnVal = 0; - break; - } - } - return rtnVal;; -} - -/* Evaluate an or block item of a regular expression. */ -FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex ) -{ - KeyOps *keyOps = pd->fsmCtx->keyOps; - - /* The return value, is the alphabet signed? */ - FsmAp *rtnVal = 0; - switch ( type ) { - case Data: { - /* Put the or data into an array of ints. Note that we find unique - * keys. Duplicates are silently ignored. The alternative would be to - * issue warning or an error but since we can't with [a0-9a] or 'a' | - * 'a' don't bother here. */ - KeySet keySet( keyOps ); - makeFsmUniqueKeyArray( keySet, data.data, data.length(), - rootRegex != 0 ? rootRegex->caseInsensitive : false, pd ); - - /* Run the or operator. */ - rtnVal = FsmAp::orFsm( pd->fsmCtx, keySet.data, keySet.length() ); - break; - } - case Range: { - /* Make the upper and lower keys. */ - Key lowKey = makeFsmKeyChar( lower, pd ); - Key highKey = makeFsmKeyChar( upper, pd ); - - /* Validate the range. */ - if ( keyOps->gt( lowKey, highKey ) ) { - /* Recover by setting upper to lower; */ - pd->id->error(loc) << "lower end of range is greater then upper end" << endl; - highKey = lowKey; - } - - /* Make the range machine. */ - rtnVal = FsmAp::rangeFsm( pd->fsmCtx, lowKey, highKey ); - - if ( rootRegex != 0 && rootRegex->caseInsensitive ) { - if ( keyOps->le( lowKey, 'Z' ) && pd->fsmCtx->keyOps->le( 'A', highKey ) ) { - Key otherLow = keyOps->lt( lowKey, 'A' ) ? Key('A') : lowKey; - Key otherHigh = keyOps->lt( 'Z', highKey ) ? Key('Z') : highKey; - - otherLow = keyOps->add( 'a', ( keyOps->sub( otherLow, 'A' ) ) ); - otherHigh = keyOps->add( 'a', ( keyOps->sub( otherHigh, 'A' ) ) ); - - FsmAp *otherRange = FsmAp::rangeFsm( pd->fsmCtx, otherLow, otherHigh ); - FsmRes res = FsmAp::unionOp( rtnVal, otherRange ); - rtnVal = res.fsm; - rtnVal->minimizePartition2(); - } - else if ( keyOps->le( lowKey, 'z' ) && keyOps->le( 'a', highKey ) ) { - Key otherLow = keyOps->lt( lowKey, 'a' ) ? Key('a') : lowKey; - Key otherHigh = keyOps->lt( 'z', highKey ) ? Key('z') : highKey; - - otherLow = keyOps->add('A' , ( keyOps->sub( otherLow , 'a' ) )); - otherHigh = keyOps->add('A' , ( keyOps->sub( otherHigh , 'a' ) )); - - FsmAp *otherRange = FsmAp::rangeFsm( pd->fsmCtx, otherLow, otherHigh ); - FsmRes res = FsmAp::unionOp( rtnVal, otherRange ); - rtnVal = res.fsm; - rtnVal->minimizePartition2(); - } - } - - break; - }} - return rtnVal; -} diff --git a/ragel/rlparse.kh b/ragel/rlparse.kh deleted file mode 100644 index e077d6a2..00000000 --- a/ragel/rlparse.kh +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright 2001-2007 Adrian Thurston - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _RLPARSE_H -#define _RLPARSE_H - -#include -#include "avltree.h" -#include "parsedata.h" - - -/* Import scanner tokens. */ -#define IMP_Word 128 -#define IMP_Literal 129 -#define IMP_UInt 130 -#define IMP_Define 131 - -struct ParamList; - -struct TokHead -{ - TokHead *next; -}; - -struct Parser6 -{ -%%{ - parser Parser6; - - # General tokens. - token TK_Word, TK_Literal, TK_EndSection, TK_UInt, TK_Hex, - TK_Word, TK_Literal, TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon, - TK_Arrow, TK_DoubleArrow, TK_StarStar, TK_ColonEquals, TK_BarEquals, - TK_NameSep, TK_BarStar, TK_DashDash, TK_DotDotIndep; - - # Conditions. - token TK_StartCond, TK_AllCond, TK_LeavingCond; - - # State embedding actions. - token TK_Middle; - - # Global error actions. - token TK_StartGblError, TK_AllGblError, TK_FinalGblError, - TK_NotFinalGblError, TK_NotStartGblError, TK_MiddleGblError; - - # Local error actions. - token TK_StartLocalError, TK_AllLocalError, TK_FinalLocalError, - TK_NotFinalLocalError, TK_NotStartLocalError, TK_MiddleLocalError; - - # EOF Action embedding. - token TK_StartEOF, TK_AllEOF, TK_FinalEOF, TK_NotFinalEOF, TK_NotStartEOF, - TK_MiddleEOF; - - # To State Actions. - token TK_StartToState, TK_AllToState, TK_FinalToState, TK_NotFinalToState, - TK_NotStartToState, TK_MiddleToState; - - # In State Actions. - token TK_StartFromState, TK_AllFromState, TK_FinalFromState, - TK_NotFinalFromState, TK_NotStartFromState, TK_MiddleFromState; - - token TK_ColonNfaOpen, TK_CloseColon, TK_ColonCondOpen, - TK_ColonCondStarOpen, TK_ColonCondPlusOpen, TK_ColonNoMaxOpen; - - # Regular expression tokens. */ - token RE_Slash, RE_SqOpen, RE_SqOpenNeg, RE_SqClose, RE_Dot, RE_Star, - RE_Dash, RE_Char; - - # Tokens specific to inline code. - token IL_WhiteSpace, IL_Comment, IL_Literal, IL_Symbol; - - # Keywords. - token KW_Machine, KW_Include, KW_Import, KW_Write, KW_Action, KW_AlphType, - KW_Range, KW_GetKey, KW_Include, KW_Write, KW_Machine, KW_InWhen, - KW_When, KW_OutWhen, KW_Eof, KW_Err, KW_Lerr, KW_To, KW_From, - KW_Export, KW_PrePush, KW_PostPop, KW_Length, KW_NfaPrePush, KW_NfaPostPop; - - # Specials in code blocks. - token KW_Break, KW_Exec, KW_Hold, KW_PChar, KW_Char, KW_Goto, KW_Call, - KW_Ret, KW_CurState, KW_TargState, KW_Entry, KW_Next, KW_Exec, - KW_Variable, KW_Access, KW_Ncall, KW_Nret, KW_Nbreak; - - token TK_SubstRef; -}%% - - %% write instance_data; - - void init(); - int parseLangEl( int type, const Token *token ); - void clear(); - - Parser6( InputData *id, const char *fileName, char *sectionName, - const InputLoc §ionLoc, const HostLang *hostLang, - MinimizeLevel minimizeLevel, - MinimizeOpt minimizeOpt ); - - int token( InputLoc &loc, int tokId, char *tokstart, int toklen ); - void tryMachineDef( const InputLoc &loc, char *name, - MachineDef *machineDef, bool isInstance ); - - /* Report an error encountered by the parser. */ - ostream &parse_error( int tokId, Token &token ); - - ParseData *pd; - - /* The name of the root section, this does not change during an include. */ - char *sectionName; - const HostLang *hostLang; - - NameRef nameRef; - NameRefList nameRefList; - - Vector exportContext; - - TokHead *tokHead; - ActionParamList *paramList; - - Parser6 *prev, *next; - - void terminateParser(); - - bool parseSubstitutions; -}; - -%% write token_defs; - -void clearTokdata( Parser6 *parser ); - -#endif diff --git a/ragel/rlparse.kl b/ragel/rlparse.kl deleted file mode 100644 index 7f69ab3e..00000000 --- a/ragel/rlparse.kl +++ /dev/null @@ -1,1943 +0,0 @@ -/* - * Copyright 2001-2016 Adrian Thurston - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "rlparse.h" -#include "ragel.h" -#include "inputdata.h" -#include -#include -#include - -using std::endl; - -Parser6::Parser6( InputData *id, const char *fileName, char *sectionName, - const InputLoc §ionLoc, const HostLang *hostLang, - MinimizeLevel minimizeLevel, - MinimizeOpt minimizeOpt ) -: - sectionName(sectionName), - hostLang(hostLang), - tokHead(0), - parseSubstitutions(false) -{ - pd = new ParseData( id, std::string(sectionName), - id->nextMachineId++, sectionLoc, hostLang, minimizeLevel, minimizeOpt ); - exportContext.append( false ); - - pd->includeHistory.push_back( IncludeHistoryItem( fileName, sectionName ) ); -} - -%%{ - -parser Parser6; - -include "rlparse.kh"; - -start: section_list; - -section_list: section_list statement_list TK_EndSection; -section_list: ; - -statement_list: statement_list statement; -statement_list: ; - -statement: assignment commit; -statement: instantiation commit; -statement: nfa_union commit; -statement: action_spec commit; -statement: alphtype_spec commit; -statement: range_spec commit; -statement: getkey_spec commit; -statement: access_spec commit; -statement: variable_spec commit; -statement: export_block commit; -statement: pre_push_spec commit; -statement: post_pop_spec commit; -statement: nfa_pre_push_spec commit; -statement: nfa_post_pop_spec commit; -statement: length_spec commit; - -length_spec: - KW_Length TK_Word ';' - final { - LengthDef *lengthDef = new LengthDef( $2->data ); - pd->lengthDefList.append( lengthDef ); - - /* Generic creation of machine for instantiation and assignment. */ - MachineDef *machineDef = new MachineDef( lengthDef ); - tryMachineDef( $2->loc, $2->data, machineDef, false ); - }; - -pre_push_spec: - KW_PrePush '{' inline_block '}' - final { - if ( pd->fsmCtx->prePushExpr != 0 ) { - /* Recover by just ignoring the duplicate. */ - pd->id->error($2->loc) << "pre_push code already defined" << endl; - } - - pd->fsmCtx->prePushExpr = new InlineBlock( $2->loc, $3->inlineList ); - }; - - -post_pop_spec: - KW_PostPop '{' inline_block '}' - final { - if ( pd->fsmCtx->postPopExpr != 0 ) { - /* Recover by just ignoring the duplicate. */ - pd->id->error($2->loc) << "post_pop code already defined" << endl; - } - - pd->fsmCtx->postPopExpr = new InlineBlock( $2->loc, $3->inlineList ); - }; - -nfa_pre_push_spec: - KW_NfaPrePush '{' inline_block '}' - final { - if ( pd->fsmCtx->nfaPrePushExpr != 0 ) { - /* Recover by just ignoring the duplicate. */ - pd->id->error($2->loc) << "nfa_pre_push code already defined" << endl; - } - - pd->fsmCtx->nfaPrePushExpr = new InlineBlock( $2->loc, $3->inlineList ); - }; - -nfa_post_pop_spec: - KW_NfaPostPop '{' inline_block '}' - final { - if ( pd->fsmCtx->nfaPostPopExpr != 0 ) { - /* Recover by just ignoring the duplicate. */ - pd->id->error($2->loc) << "nfa_post_pop code already defined" << endl; - } - - pd->fsmCtx->nfaPostPopExpr = new InlineBlock( $2->loc, $3->inlineList ); - }; - -export_open: KW_Export - final { - exportContext.append( true ); - }; - -nonterm opt_export -{ - bool isSet; -}; - -opt_export: export_open final { $$->isSet = true; }; -opt_export: final { $$->isSet = false; }; - -export_block: export_open '{' statement_list '}' - final { - exportContext.remove( exportContext.length()-1 ); - }; - -assignment: - opt_export machine_name '=' join ';' final { - /* Main machine must be an instance. */ - bool isInstance = false; - if ( strcmp($2->token.data, mainMachine) == 0 ) { - pd->id->warning($2->token.loc) << - "main machine will be implicitly instantiated" << endl; - isInstance = true; - } - - /* Generic creation of machine for instantiation and assignment. */ - MachineDef *machineDef = new MachineDef( $4->join ); - tryMachineDef( $2->token.loc, $2->token.data, machineDef, isInstance ); - - if ( $1->isSet ) - exportContext.remove( exportContext.length()-1 ); - - $4->join->loc = $3->loc; - }; - -instantiation: - opt_export machine_name TK_ColonEquals join_or_lm ';' final { - /* Generic creation of machine for instantiation and assignment. */ - tryMachineDef( $2->token.loc, $2->token.data, $4->machineDef, true ); - - if ( $1->isSet ) - exportContext.remove( exportContext.length()-1 ); - - /* Pass a location to join_or_lm */ - if ( $4->machineDef->join != 0 ) - $4->machineDef->join->loc = $3->loc; - }; - -nonterm nfa_round_spec -{ - long depth; - long grouping; -}; - -nfa_round_spec: - TK_UInt ',' TK_UInt - final { - // Convert the priority number to a long. Check for overflow. - errno = 0; - $$->depth = strtol( $1->data, 0, 10 ); - if ( $$->depth == LONG_MAX && errno == ERANGE ) - pd->id->error($1->loc) << "rounds " << $1->data << " overflows" << endl; - - $$->grouping = strtol( $3->data, 0, 10 ); - if ( $$->grouping == LONG_MAX && errno == ERANGE ) - pd->id->error($3->loc) << "grouping " << $3->data << " overflows" << endl; - }; - -nonterm nfa_round_list -{ - NfaRoundVect *roundsList; -}; - -nfa_round_list: - nfa_round_list ',' nfa_round_spec - final { - $$->roundsList = $1->roundsList; - $$->roundsList->append( - NfaRound( $3->depth, $3->grouping ) ); - }; - -nfa_round_list: - nfa_round_spec - final { - $$->roundsList = new NfaRoundVect; - $$->roundsList->append( - NfaRound( $1->depth, $1->grouping ) ); - }; - -nonterm nfa_rounds -{ - NfaRoundVect *roundsList; -}; - -nfa_rounds: - '(' nfa_round_list ')' - final { - $$->roundsList = $2->roundsList; - }; - -nonterm nfa_expr -{ - NfaUnion *nfaUnion; -}; - -nfa_expr: - nfa_expr '|' term_short final { - $$->nfaUnion = $1->nfaUnion; - $$->nfaUnion->terms.append( $3->term ); - }; -nfa_expr: - term_short final { - $$->nfaUnion = new NfaUnion(); - $$->nfaUnion->terms.append( $1->term ); - }; - -nfa_union: - machine_name TK_BarEquals nfa_rounds nfa_expr ';' final { - $4->nfaUnion->roundsList = $3->roundsList; - MachineDef *machineDef = new MachineDef( $4->nfaUnion ); - - /* Generic creation of machine for instantiation and assignment. */ - tryMachineDef( $1->token.loc, $1->token.data, machineDef, true ); - }; - - -type token_type -{ - Token token; -}; - -nonterm machine_name uses token_type; - -machine_name: - TK_Word final { - /* Make/get the priority key. The name may have already been referenced - * and therefore exist. */ - PriorDictEl *priorDictEl; - if ( pd->priorDict.insert( $1->data, pd->fsmCtx->nextPriorKey, &priorDictEl ) ) - pd->fsmCtx->nextPriorKey += 1; - pd->curDefPriorKey = priorDictEl->value; - - /* Make/get the local error key. */ - LocalErrDictEl *localErrDictEl; - if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) ) - pd->nextLocalErrKey += 1; - pd->curDefLocalErrKey = localErrDictEl->value; - - $$->token = *$1; - }; - -nonterm action_param -{ - ActionParam *param; -}; - -action_param: - TK_Word - final { - $$->param = new ActionParam( $1->data ); - }; - -nonterm action_param_list -{ - ActionParamList *paramList; -}; - -action_param_list: - action_param_list ',' action_param - final { - $$->paramList = $1->paramList; - $$->paramList->append( $3->param ); - }; - -action_param_list: - action_param - final { - $$->paramList = new ActionParamList; - $$->paramList->append( $1->param ); - }; - -nonterm opt_action_param_list uses action_param_list; - -opt_action_param_list: - action_param_list - final { - $$->paramList = $1->paramList; - }; - -opt_action_param_list: - final { - $$->paramList = new ActionParamList; - }; - -nonterm opt_action_params uses action_param_list; - -opt_action_params: - '(' opt_action_param_list ')' - try { - parseSubstitutions = true; - } - final { - $$->paramList = $2->paramList; - paramList = $2->paramList; - }; - -opt_action_params: - final { - $$->paramList = 0; - }; - -action_spec: - KW_Action TK_Word opt_action_params '{' inline_block '}' - final { - if ( pd->actionDict.find( $2->data ) ) { - /* Recover by just ignoring the duplicate. */ - pd->id->error($2->loc) << "action \"" << $2->data << "\" already defined" << endl; - } - else { - /* Add the action to the list of actions. */ - Action *newAction = new Action( $4->loc, $2->data, - $5->inlineList, pd->fsmCtx->nextCondId++ ); - - /* Insert to list and dict. */ - pd->fsmCtx->actionList.append( newAction ); - pd->actionDict.insert( newAction ); - - newAction->paramList = $3->paramList; - if ( $3->paramList != 0 ) - newAction->argListMap = new ActionArgListMap; - } - parseSubstitutions = false; - }; - -# Specifies the data type of the input alphabet. One or two words followed by a -# semi-colon. -alphtype_spec: - KW_AlphType TK_Word TK_Word ';' final { - if ( ! pd->setAlphType( $1->loc, hostLang, $2->data, $3->data ) ) { - // Recover by ignoring the alphtype statement. - pd->id->error($2->loc) << "\"" << $2->data << - " " << $3->data << "\" is not a valid alphabet type" << endl; - } - }; - -alphtype_spec: - KW_AlphType TK_Word ';' final { - if ( ! pd->setAlphType( $1->loc, hostLang, $2->data ) ) { - // Recover by ignoring the alphtype statement. - pd->id->error($2->loc) << "\"" << $2->data << - "\" is not a valid alphabet type" << endl; - } - }; - -# Specifies a range to assume that the input characters will fall into. -range_spec: - KW_Range alphabet_num alphabet_num ';' final { - // Save the upper and lower ends of the range and emit the line number. - pd->lowerNum = $2->token.data; - pd->upperNum = $3->token.data; - pd->rangeLowLoc = $2->token.loc; - pd->rangeHighLoc = $3->token.loc; - }; - -getkey_spec: - KW_GetKey inline_expr ';' final { - pd->fsmCtx->getKeyExpr = $2->inlineList; - }; - -access_spec: - KW_Access inline_expr ';' final { - pd->fsmCtx->accessExpr = $2->inlineList; - }; - -variable_spec: - KW_Variable opt_whitespace TK_Word inline_expr ';' final { - /* FIXME: Need to implement the rest of this. */ - bool wasSet = pd->setVariable( $3->data, $4->inlineList ); - if ( !wasSet ) - pd->id->error($3->loc) << "bad variable name" << endl; - }; - -opt_whitespace: opt_whitespace IL_WhiteSpace; -opt_whitespace: ; - -# -# Expressions -# - -nonterm join_or_lm -{ - MachineDef *machineDef; -}; - -join_or_lm: - join final { - $$->machineDef = new MachineDef( $1->join ); - }; -join_or_lm: - TK_BarStar lm_part_list '*' '|' final { - /* Create a new factor going to a longest match structure. Record - * in the parse data that we have a longest match. */ - LongestMatch *lm = new LongestMatch( $1->loc, $2->lmPartList ); - pd->lmList.append( lm ); - for ( LmPartList::Iter lmp = *($2->lmPartList); lmp.lte(); lmp++ ) - lmp->longestMatch = lm; - $$->machineDef = new MachineDef( lm ); - }; - -nonterm lm_part_list -{ - LmPartList *lmPartList; -}; - -lm_part_list: - lm_part_list longest_match_part - final { - if ( $2->lmPart != 0 ) - $1->lmPartList->append( $2->lmPart ); - $$->lmPartList = $1->lmPartList; - }; -lm_part_list: - longest_match_part - final { - /* Create a new list with the part. */ - $$->lmPartList = new LmPartList; - if ( $1->lmPart != 0 ) - $$->lmPartList->append( $1->lmPart ); - }; - -nonterm longest_match_part -{ - LongestMatchPart *lmPart; -}; - -longest_match_part: - action_spec commit - final { - $$->lmPart = 0; - }; -longest_match_part: - assignment commit - final { - $$->lmPart = 0; - }; -longest_match_part: - join opt_lm_part_action ';' commit - final { - $$->lmPart = 0; - Action *action = $2->action; - if ( action != 0 ) - action->isLmAction = true; - $$->lmPart = new LongestMatchPart( $1->join, action, - $3->loc, pd->nextLongestMatchId++ ); - - /* Provide a location to join. Unfortunately We don't - * have the start of the join as in other occurances. Use the end. */ - $1->join->loc = $3->loc; - }; - -nonterm opt_lm_part_action -{ - Action *action; -}; - -opt_lm_part_action: - TK_DoubleArrow action_embed final { - $$->action = $2->action; - }; -opt_lm_part_action: - action_embed_block final { - $$->action = $1->action; - }; -opt_lm_part_action: - final { - $$->action = 0; - }; - - -nonterm join -{ - Join *join; -}; - -join: - join ',' expression final { - /* Append the expression to the list and return it. */ - $1->join->exprList.append( $3->expression ); - $$->join = $1->join; - }; -join: - expression final { - $$->join = new Join( $1->expression ); - }; - -nonterm expression -{ - Expression *expression; -}; - -expression: - expression '|' term_short final { - $$->expression = new Expression( $1->expression, - $3->term, Expression::OrType ); - }; -expression: - expression '&' term_short final { - $$->expression = new Expression( $1->expression, - $3->term, Expression::IntersectType ); - }; -expression: - expression '-' term_short final { - $$->expression = new Expression( $1->expression, - $3->term, Expression::SubtractType ); - }; -expression: - expression TK_DashDash term_short final { - $$->expression = new Expression( $1->expression, - $3->term, Expression::StrongSubtractType ); - }; -expression: - term_short final { - $$->expression = new Expression( $1->term ); - }; - -# This is where we resolve the ambiguity involving -. By default ragel tries to -# do a longest match, which gives precedence to a concatenation because it is -# innermost. What we need is to force term into a shortest match so that when - -# is seen it doesn't try to extend term with a concatenation, but ends term and -# goes for a subtraction. -# -# The shortest tag overrides the default longest match action ordering strategy -# and instead forces a shortest match stragegy. The wrap the term production in -# a new nonterminal 'term_short' to guarantee the shortest match behaviour. - -shortest term_short; -nonterm term_short -{ - Term *term; -}; - -term_short: - term final { - $$->term = $1->term; - }; - -nonterm term -{ - Term *term; -}; - -term: - term factor_with_label final { - $$->term = new Term( $1->term, $2->factorWithAug ); - }; -term: - term '.' factor_with_label final { - $$->term = new Term( $1->term, $3->factorWithAug ); - }; -term: - term TK_ColonGt factor_with_label final { - $$->term = new Term( $1->term, $3->factorWithAug, Term::RightStartType ); - }; -term: - term TK_ColonGtGt factor_with_label final { - $$->term = new Term( $1->term, $3->factorWithAug, Term::RightFinishType ); - }; -term: - term TK_LtColon factor_with_label final { - $$->term = new Term( $1->term, - $3->factorWithAug, Term::LeftType ); - }; -term: - factor_with_label final { - $$->term = new Term( $1->factorWithAug ); - }; - -nonterm factor_with_label -{ - FactorWithAug *factorWithAug; -}; - -factor_with_label: - TK_Word ':' factor_with_label final { - /* Add the label to the list and pass the factor up. */ - $3->factorWithAug->labels.insert( $3->factorWithAug->labels.begin(), Label($1->loc, $1->data) ); - $$->factorWithAug = $3->factorWithAug; - }; -factor_with_label: - factor_with_ep final { - $$->factorWithAug = $1->factorWithAug; - }; - -nonterm factor_with_ep -{ - FactorWithAug *factorWithAug; -}; - -factor_with_ep: - factor_with_ep TK_Arrow local_state_ref final { - /* Add the target to the list and return the factor object. */ - $1->factorWithAug->epsilonLinks.append( EpsilonLink( $2->loc, new NameRef(nameRef) ) ); - $$->factorWithAug = $1->factorWithAug; - }; -factor_with_ep: - factor_with_aug final { - $$->factorWithAug = $1->factorWithAug; - }; - -nonterm factor_with_aug -{ - FactorWithAug *factorWithAug; -}; - -factor_with_aug: - factor_with_aug aug_type_base action_embed final { - /* Append the action to the factorWithAug, record the refernce from - * factorWithAug to the action and pass up the factorWithAug. */ - $1->factorWithAug->actions.append( - ParserAction( $2->loc, $2->augType, 0, $3->action ) ); - $$->factorWithAug = $1->factorWithAug; - }; -factor_with_aug: - factor_with_aug aug_type_base priority_aug final { - /* Append the named priority to the factorWithAug and pass it up. */ - $1->factorWithAug->priorityAugs.append( - PriorityAug( $2->augType, pd->curDefPriorKey, $3->priorityNum ) ); - $$->factorWithAug = $1->factorWithAug; - }; -factor_with_aug: - factor_with_aug aug_type_base '(' priority_name ',' priority_aug ')' final { - /* Append the priority using a default name. */ - $1->factorWithAug->priorityAugs.append( - PriorityAug( $2->augType, $4->priorityName, $6->priorityNum ) ); - $$->factorWithAug = $1->factorWithAug; - }; -factor_with_aug: - factor_with_aug aug_type_cond action_embed final { - $1->factorWithAug->conditions.append( ConditionTest( $2->loc, - $2->augType, $3->action, true ) ); - $$->factorWithAug = $1->factorWithAug; - }; -factor_with_aug: - factor_with_aug aug_type_cond '!' action_embed final { - $1->factorWithAug->conditions.append( ConditionTest( $2->loc, - $2->augType, $4->action, false ) ); - $$->factorWithAug = $1->factorWithAug; - }; -factor_with_aug: - factor_with_aug aug_type_to_state action_embed final { - /* Append the action, pass it up. */ - $1->factorWithAug->actions.append( ParserAction( $2->loc, - $2->augType, 0, $3->action ) ); - $$->factorWithAug = $1->factorWithAug; - }; -factor_with_aug: - factor_with_aug aug_type_from_state action_embed final { - /* Append the action, pass it up. */ - $1->factorWithAug->actions.append( ParserAction( $2->loc, - $2->augType, 0, $3->action ) ); - $$->factorWithAug = $1->factorWithAug; - }; -factor_with_aug: - factor_with_aug aug_type_eof action_embed final { - /* Append the action, pass it up. */ - $1->factorWithAug->actions.append( ParserAction( $2->loc, - $2->augType, 0, $3->action ) ); - $$->factorWithAug = $1->factorWithAug; - }; -factor_with_aug: - factor_with_aug aug_type_gbl_error action_embed final { - /* Append the action to the factorWithAug, record the refernce from - * factorWithAug to the action and pass up the factorWithAug. */ - $1->factorWithAug->actions.append( ParserAction( $2->loc, - $2->augType, pd->curDefLocalErrKey, $3->action ) ); - $$->factorWithAug = $1->factorWithAug; - }; -factor_with_aug: - factor_with_aug aug_type_local_error action_embed final { - /* Append the action to the factorWithAug, record the refernce from - * factorWithAug to the action and pass up the factorWithAug. */ - $1->factorWithAug->actions.append( ParserAction( $2->loc, - $2->augType, pd->curDefLocalErrKey, $3->action ) ); - $$->factorWithAug = $1->factorWithAug; - }; -factor_with_aug: - factor_with_aug aug_type_local_error '(' local_err_name ',' action_embed ')' final { - /* Append the action to the factorWithAug, record the refernce from - * factorWithAug to the action and pass up the factorWithAug. */ - $1->factorWithAug->actions.append( ParserAction( $2->loc, - $2->augType, $4->error_name, $6->action ) ); - $$->factorWithAug = $1->factorWithAug; - }; -factor_with_aug: - factor_with_rep final { - $$->factorWithAug = new FactorWithAug( $1->factorWithRep ); - }; - -type aug_type -{ - ParserLoc loc; - AugType augType; -}; - -# Classes of transtions on which to embed actions or change priorities. -nonterm aug_type_base uses aug_type; - -aug_type_base: '@' final { $$->loc = $1->loc; $$->augType = at_finish; }; -aug_type_base: '%' final { $$->loc = $1->loc; $$->augType = at_leave; }; -aug_type_base: '$' final { $$->loc = $1->loc; $$->augType = at_all; }; -aug_type_base: '>' final { $$->loc = $1->loc; $$->augType = at_start; }; - -# Embedding conditions. -nonterm aug_type_cond uses aug_type; - -aug_type_cond: TK_StartCond final { $$->loc = $1->loc; $$->augType = at_start; }; -aug_type_cond: '>' KW_When final { $$->loc = $1->loc; $$->augType = at_start; }; -aug_type_cond: TK_AllCond final { $$->loc = $1->loc; $$->augType = at_all; }; -aug_type_cond: '$' KW_When final { $$->loc = $1->loc; $$->augType = at_all; }; -aug_type_cond: TK_LeavingCond final { $$->loc = $1->loc; $$->augType = at_leave; }; -aug_type_cond: '%' KW_When final { $$->loc = $1->loc; $$->augType = at_leave; }; -aug_type_cond: KW_When final { $$->loc = $1->loc; $$->augType = at_all; }; -aug_type_cond: KW_InWhen final { $$->loc = $1->loc; $$->augType = at_start; }; -aug_type_cond: KW_OutWhen final { $$->loc = $1->loc; $$->augType = at_leave; }; - -# -# To state actions. -# - -nonterm aug_type_to_state uses aug_type; - -aug_type_to_state: TK_StartToState - final { $$->loc = $1->loc; $$->augType = at_start_to_state; }; -aug_type_to_state: '>' KW_To - final { $$->loc = $1->loc; $$->augType = at_start_to_state; }; - -aug_type_to_state: TK_NotStartToState - final { $$->loc = $1->loc; $$->augType = at_not_start_to_state; }; -aug_type_to_state: '<' KW_To - final { $$->loc = $1->loc; $$->augType = at_not_start_to_state; }; - -aug_type_to_state: TK_AllToState - final { $$->loc = $1->loc; $$->augType = at_all_to_state; }; -aug_type_to_state: '$' KW_To - final { $$->loc = $1->loc; $$->augType = at_all_to_state; }; - -aug_type_to_state: TK_FinalToState - final { $$->loc = $1->loc; $$->augType = at_final_to_state; }; -aug_type_to_state: '%' KW_To - final { $$->loc = $1->loc; $$->augType = at_final_to_state; }; - -aug_type_to_state: TK_NotFinalToState - final { $$->loc = $1->loc; $$->augType = at_not_final_to_state; }; -aug_type_to_state: '@' KW_To - final { $$->loc = $1->loc; $$->augType = at_not_final_to_state; }; - -aug_type_to_state: TK_MiddleToState - final { $$->loc = $1->loc; $$->augType = at_middle_to_state; }; -aug_type_to_state: TK_Middle KW_To - final { $$->loc = $1->loc; $$->augType = at_middle_to_state; }; - -# -# From state actions. -# - -nonterm aug_type_from_state uses aug_type; - -aug_type_from_state: TK_StartFromState - final { $$->loc = $1->loc; $$->augType = at_start_from_state; }; -aug_type_from_state: '>' KW_From - final { $$->loc = $1->loc; $$->augType = at_start_from_state; }; - -aug_type_from_state: TK_NotStartFromState - final { $$->loc = $1->loc; $$->augType = at_not_start_from_state; }; -aug_type_from_state: '<' KW_From - final { $$->loc = $1->loc; $$->augType = at_not_start_from_state; }; - -aug_type_from_state: TK_AllFromState - final { $$->loc = $1->loc; $$->augType = at_all_from_state; }; -aug_type_from_state: '$' KW_From - final { $$->loc = $1->loc; $$->augType = at_all_from_state; }; - -aug_type_from_state: TK_FinalFromState - final { $$->loc = $1->loc; $$->augType = at_final_from_state; }; -aug_type_from_state: '%' KW_From - final { $$->loc = $1->loc; $$->augType = at_final_from_state; }; - -aug_type_from_state: TK_NotFinalFromState - final { $$->loc = $1->loc; $$->augType = at_not_final_from_state; }; -aug_type_from_state: '@' KW_From - final { $$->loc = $1->loc; $$->augType = at_not_final_from_state; }; - -aug_type_from_state: TK_MiddleFromState - final { $$->loc = $1->loc; $$->augType = at_middle_from_state; }; -aug_type_from_state: TK_Middle KW_From - final { $$->loc = $1->loc; $$->augType = at_middle_from_state; }; - -# -# Eof state actions. -# - -nonterm aug_type_eof uses aug_type; - -aug_type_eof: TK_StartEOF - final { $$->loc = $1->loc; $$->augType = at_start_eof; }; -aug_type_eof: '>' KW_Eof - final { $$->loc = $1->loc; $$->augType = at_start_eof; }; - -aug_type_eof: TK_NotStartEOF - final { $$->loc = $1->loc; $$->augType = at_not_start_eof; }; -aug_type_eof: '<' KW_Eof - final { $$->loc = $1->loc; $$->augType = at_not_start_eof; }; - -aug_type_eof: TK_AllEOF - final { $$->loc = $1->loc; $$->augType = at_all_eof; }; -aug_type_eof: '$' KW_Eof - final { $$->loc = $1->loc; $$->augType = at_all_eof; }; - -aug_type_eof: TK_FinalEOF - final { $$->loc = $1->loc; $$->augType = at_final_eof; }; -aug_type_eof: '%' KW_Eof - final { $$->loc = $1->loc; $$->augType = at_final_eof; }; - -aug_type_eof: TK_NotFinalEOF - final { $$->loc = $1->loc; $$->augType = at_not_final_eof; }; -aug_type_eof: '@' KW_Eof - final { $$->loc = $1->loc; $$->augType = at_not_final_eof; }; - -aug_type_eof: TK_MiddleEOF - final { $$->loc = $1->loc; $$->augType = at_middle_eof; }; -aug_type_eof: TK_Middle KW_Eof - final { $$->loc = $1->loc; $$->augType = at_middle_eof; }; - -# -# Global error actions. -# - -nonterm aug_type_gbl_error uses aug_type; - -aug_type_gbl_error: TK_StartGblError - final { $$->loc = $1->loc; $$->augType = at_start_gbl_error; }; -aug_type_gbl_error: '>' KW_Err - final { $$->loc = $1->loc; $$->augType = at_start_gbl_error; }; - -aug_type_gbl_error: TK_NotStartGblError - final { $$->loc = $1->loc; $$->augType = at_not_start_gbl_error; }; -aug_type_gbl_error: '<' KW_Err - final { $$->loc = $1->loc; $$->augType = at_not_start_gbl_error; }; - -aug_type_gbl_error: TK_AllGblError - final { $$->loc = $1->loc; $$->augType = at_all_gbl_error; }; -aug_type_gbl_error: '$' KW_Err - final { $$->loc = $1->loc; $$->augType = at_all_gbl_error; }; - -aug_type_gbl_error: TK_FinalGblError - final { $$->loc = $1->loc; $$->augType = at_final_gbl_error; }; -aug_type_gbl_error: '%' KW_Err - final { $$->loc = $1->loc; $$->augType = at_final_gbl_error; }; - -aug_type_gbl_error: TK_NotFinalGblError - final { $$->loc = $1->loc; $$->augType = at_not_final_gbl_error; }; -aug_type_gbl_error: '@' KW_Err - final { $$->loc = $1->loc; $$->augType = at_not_final_gbl_error; }; - -aug_type_gbl_error: TK_MiddleGblError - final { $$->loc = $1->loc; $$->augType = at_middle_gbl_error; }; -aug_type_gbl_error: TK_Middle KW_Err - final { $$->loc = $1->loc; $$->augType = at_middle_gbl_error; }; - - -# -# Local error actions. -# - -nonterm aug_type_local_error uses aug_type; - -aug_type_local_error: TK_StartLocalError - final { $$->loc = $1->loc; $$->augType = at_start_local_error; }; -aug_type_local_error: '>' KW_Lerr - final { $$->loc = $1->loc; $$->augType = at_start_local_error; }; - -aug_type_local_error: TK_NotStartLocalError - final { $$->loc = $1->loc; $$->augType = at_not_start_local_error; }; -aug_type_local_error: '<' KW_Lerr - final { $$->loc = $1->loc; $$->augType = at_not_start_local_error; }; - -aug_type_local_error: TK_AllLocalError - final { $$->loc = $1->loc; $$->augType = at_all_local_error; }; -aug_type_local_error: '$' KW_Lerr - final { $$->loc = $1->loc; $$->augType = at_all_local_error; }; - -aug_type_local_error: TK_FinalLocalError - final { $$->loc = $1->loc; $$->augType = at_final_local_error; }; -aug_type_local_error: '%' KW_Lerr - final { $$->loc = $1->loc; $$->augType = at_final_local_error; }; - -aug_type_local_error: TK_NotFinalLocalError - final { $$->loc = $1->loc; $$->augType = at_not_final_local_error; }; -aug_type_local_error: '@' KW_Lerr - final { $$->loc = $1->loc; $$->augType = at_not_final_local_error; }; - -aug_type_local_error: TK_MiddleLocalError - final { $$->loc = $1->loc; $$->augType = at_middle_local_error; }; -aug_type_local_error: TK_Middle KW_Lerr - final { $$->loc = $1->loc; $$->augType = at_middle_local_error; }; - - -type action_ref -{ - Action *action; -}; - -# Different ways to embed actions. A TK_Word is reference to an action given by -# the user as a statement in the fsm specification. An action can also be -# specified immediately. -nonterm action_embed uses action_ref; - -action_embed: named_action_ref final { $$->action = $1->action; }; -action_embed: '(' named_action_ref ')' final { $$->action = $2->action; }; -action_embed: action_embed_block final { $$->action = $1->action; }; - -nonterm action_arg_list -{ - ActionArgList *argList; -}; - -action_arg_list: - action_arg_list ',' action_embed - final { - $$->argList = $1->argList; - $$->argList->append( $3->action ); - }; -action_arg_list: - action_embed - final { - $$->argList = new ActionArgList; - $$->argList->append( $1->action ); - }; - -nonterm opt_action_arg_list uses action_arg_list; - -opt_action_arg_list: - action_arg_list - final - { - $$->argList = $1->argList; - }; -opt_action_arg_list: - final { - $$->argList = new ActionArgList; - }; - -nonterm named_action_ref uses action_ref; - -named_action_ref: - TK_Word - try { - /* Set the name in the actionDict. */ - Action *action = pd->actionDict.find( $1->data ); - if ( action != 0 ) { - /* Pass up the action element */ - $$->action = action; - if ( action->paramList != 0 ) - reject(); - } - else { - /* Will recover by returning null as the action. */ - pd->id->error($1->loc) << "action lookup of \"" << $1->data << "\" failed" << endl; - $$->action = 0; - } - }; -named_action_ref: - TK_Word '(' opt_action_arg_list ')' - try { - /* Set the name in the actionDict. */ - Action *action = pd->actionDict.find( $1->data ); - if ( action != 0 ) { - - if ( action->paramList == 0 ) - reject(); - - /* - * Store the action we resolved. In the final action we will - * convert this to the specialized action. Can't do this here since - * it is a try action and we have not processed the args list (all - * done by final actions ). - */ - $$->action = action; - } - else { - /* Will recover by returning null as the action. */ - pd->id->error($1->loc) << "action lookup of \"" << $1->data << "\" failed" << endl; - $$->action = 0; - } - } - final { - /* Make sure the number of actions line up. */ - if ( $3->argList->length() != $$->action->paramList->length() ) { - pd->id->error($1->loc) << "wrong number of action " - "arguments for \"" << $1->data << "\"" << endl; - } - - /* Now we need to specialize using the supplied args. We can only - * present an Action* to fsmcodegen. */ - ActionArgListMapEl *el = $$->action->argListMap->find( $3->argList ); - if ( el == 0 ) { - /* Allocate an action representing this specialization. */ - Action *specAction = Action::cons( $1->loc, $$->action, - $3->argList, pd->fsmCtx->nextCondId++ ); - pd->fsmCtx->actionList.append( specAction ); - - el = $$->action->argListMap->insert( $3->argList, specAction ); - } - else { - /* Can delete $3->arg list. */ - delete $3->argList; - } - - $$->action = el->value; - }; - -nonterm action_embed_block uses action_ref; - -action_embed_block: - '{' inline_block '}' final { - /* Create the action, add it to the list and pass up. */ - Action *newAction = new Action( $1->loc, std::string(), - $2->inlineList, pd->fsmCtx->nextCondId++ ); - pd->fsmCtx->actionList.append( newAction ); - $$->action = newAction; - }; - -nonterm priority_name -{ - int priorityName; -}; - -# A specified priority name. Looks up the name in the current priority -# dictionary. -priority_name: - TK_Word final { - // Lookup/create the priority key. - PriorDictEl *priorDictEl; - if ( pd->priorDict.insert( $1->data, pd->fsmCtx->nextPriorKey, &priorDictEl ) ) - pd->fsmCtx->nextPriorKey += 1; - - // Use the inserted/found priority key. - $$->priorityName = priorDictEl->value; - }; - -nonterm priority_aug -{ - int priorityNum; -}; - -# Priority change specs. -priority_aug: - priority_aug_num final { - char *data = $1->token.data; - if ( $1->pos || $1->neg ) { - data = new char[$1->token.length + 2]; - data[0] = $1->pos ? '+' : '-'; - memcpy( data + 1, $1->token.data, $1->token.length ); - data[$1->token.length + 1] = 0; - } - - // Convert the priority number to a long. Check for overflow. - errno = 0; - long aug = strtol( data, 0, 10 ); - if ( errno == ERANGE && aug == LONG_MAX ) { - /* Priority number too large. Recover by setting the priority to 0. */ - pd->id->error($1->token.loc) << "priority number " << data << - " overflows" << endl; - $$->priorityNum = 0; - } - else if ( errno == ERANGE && aug == LONG_MIN ) { - /* Priority number too large in the neg. Recover by using 0. */ - pd->id->error($1->token.loc) << "priority number " << data << - " underflows" << endl; - $$->priorityNum = 0; - } - else { - /* No overflow or underflow. */ - $$->priorityNum = aug; - } - - if ( $1->pos || $1->neg ) - delete[] data; - }; - -nonterm priority_aug_num -{ - bool neg; - bool pos; - Token token; -}; - - -priority_aug_num: - TK_UInt final { - $$->pos = false; - $$->neg = false; - $$->token = *$1; - }; -priority_aug_num: - '+' TK_UInt final { - $$->pos = true; - $$->neg = false; - $$->token.set( $2->data, $2->length, $1->loc ); - }; -priority_aug_num: - '-' TK_UInt final { - $$->pos = false; - $$->neg = true; - $$->token.set( $2->data, $2->length, $1->loc ); - }; - -nonterm local_err_name -{ - int error_name; -}; - -local_err_name: - TK_Word final { - /* Lookup/create the priority key. */ - LocalErrDictEl *localErrDictEl; - if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) ) - pd->nextLocalErrKey += 1; - - /* Use the inserted/found priority key. */ - $$->error_name = localErrDictEl->value; - }; - - - -# The fourth level of precedence. These are the trailing unary operators that -# allow for repetition. - -nonterm factor_with_rep -{ - FactorWithRep *factorWithRep; -}; - -factor_with_rep: - factor_with_rep '*' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - 0, 0, FactorWithRep::StarType ); - }; -factor_with_rep: - factor_with_rep TK_StarStar final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - 0, 0, FactorWithRep::StarStarType ); - }; -factor_with_rep: - factor_with_rep '?' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - 0, 0, FactorWithRep::OptionalType ); - }; -factor_with_rep: - factor_with_rep '+' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - 0, 0, FactorWithRep::PlusType ); - }; -factor_with_rep: - factor_with_rep '{' factor_rep_num '}' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - $3->rep, 0, FactorWithRep::ExactType ); - }; -factor_with_rep: - factor_with_rep '{' ',' factor_rep_num '}' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - 0, $4->rep, FactorWithRep::MaxType ); - }; -factor_with_rep: - factor_with_rep '{' factor_rep_num ',' '}' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - $3->rep, 0, FactorWithRep::MinType ); - }; -factor_with_rep: - factor_with_rep '{' factor_rep_num ',' factor_rep_num '}' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - $3->rep, $5->rep, FactorWithRep::RangeType ); - }; -factor_with_rep: - factor_with_neg final { - $$->factorWithRep = new FactorWithRep( $1->factorWithNeg ); - }; - -nonterm factor_rep_num -{ - int rep; -}; - -factor_rep_num: - TK_UInt final { - // Convert the priority number to a long. Check for overflow. - errno = 0; - long rep = strtol( $1->data, 0, 10 ); - if ( errno == ERANGE && rep == LONG_MAX ) { - // Repetition too large. Recover by returing repetition 1. */ - pd->id->error($1->loc) << "repetition number " << $1->data << " overflows" << endl; - $$->rep = 1; - } - else { - // Cannot be negative, so no overflow. - $$->rep = rep; - } - }; - - -# -# The fifth level up in precedence. Negation. -# - -nonterm factor_with_neg -{ - FactorWithNeg *factorWithNeg; -}; - -factor_with_neg: - '!' factor_with_neg final { - $$->factorWithNeg = new FactorWithNeg( $1->loc, - $2->factorWithNeg, FactorWithNeg::NegateType ); - }; -factor_with_neg: - '^' factor_with_neg final { - $$->factorWithNeg = new FactorWithNeg( $1->loc, - $2->factorWithNeg, FactorWithNeg::CharNegateType ); - }; -factor_with_neg: - factor final { - $$->factorWithNeg = new FactorWithNeg( $1->factor ); - }; - -nonterm factor -{ - Factor *factor; -}; - -factor: - TK_Literal final { - /* Create a new factor node going to a concat literal. */ - $$->factor = new Factor( new Literal( $1->loc, false, $1->data, - $1->length, Literal::LitString ) ); - }; -factor: - alphabet_num final { - /* Create a new factor node going to a literal number. */ - $$->factor = new Factor( new Literal( $1->token.loc, $1->neg, - $1->token.data, $1->token.length, Literal::Number ) ); - }; -factor: - TK_Word final { - /* Find the named graph. */ - GraphDictEl *gdNode = pd->graphDict.find( $1->data ); - if ( gdNode == 0 ) { - /* Recover by returning null as the factor node. */ - pd->id->error($1->loc) << "graph lookup of \"" << $1->data << "\" failed" << endl; - $$->factor = 0; - } - else if ( gdNode->isInstance ) { - /* Recover by retuning null as the factor node. */ - pd->id->error($1->loc) << "references to graph instantiations not allowed " - "in expressions" << endl; - $$->factor = 0; - } - else { - /* Create a factor node that is a lookup of an expression. */ - $$->factor = new Factor( $1->loc, gdNode->value ); - } - }; -factor: - RE_SqOpen regular_expr_or_data RE_SqClose final { - /* Create a new factor node going to an OR expression. */ - $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock ) ); - }; -factor: - RE_SqOpenNeg regular_expr_or_data RE_SqClose final { - /* Create a new factor node going to a negated OR expression. */ - $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) ); - }; -factor: - RE_Slash regular_expr RE_Slash final { - bool caseInsensitive = false; - checkLitOptions( pd->id, $3->loc, $3->data, $3->length, caseInsensitive ); - if ( caseInsensitive ) - $2->regExpr->caseInsensitive = true; - - /* Create a new factor node going to a regular exp. */ - $$->factor = new Factor( $2->regExpr ); - }; -factor: - range_lit TK_DotDot range_lit final { - /* Create a new factor node going to a range. */ - $$->factor = new Factor( new Range( $1->literal, $3->literal, false ) ); - }; -factor: - range_lit TK_DotDotIndep range_lit final { - /* Create a new factor node going to a range. */ - $$->factor = new Factor( new Range( $1->literal, $3->literal, true ) ); - }; -factor: - TK_ColonNfaOpen expression ',' action_embed ',' - action_embed ',' action_embed ',' action_embed ',' action_embed ',' - action_embed TK_CloseColon - final { - /* push, pop, ini, stay, repeat, exit */ - $$->factor = new Factor( $1->loc, pd->nextRepId++, $2->expression, - $4->action, $6->action, $8->action, $10->action, $12->action, $14->action, - Factor::NfaRep ); - }; - - -nonterm colon_cond -{ - Factor::Type type; - ParserLoc loc; -}; - -colon_cond: - TK_ColonCondOpen - final { - $$->type = Factor::CondStar; - $$->loc = $1->loc; - }; -colon_cond: - TK_ColonCondStarOpen - final { - $$->type = Factor::CondStar; - $$->loc = $1->loc; - }; -colon_cond: - TK_ColonCondPlusOpen - final { - $$->type = Factor::CondPlus; - $$->loc = $1->loc; - }; - -nonterm opt_max_arg -{ - Action *action; -}; - -opt_max_arg: - ',' action_embed - final - { - $$->action = $2->action; - - }; -opt_max_arg: - final - { - $$->action = 0; - }; - -factor: - colon_cond expression ',' action_embed ',' - action_embed ',' action_embed opt_max_arg TK_CloseColon - final { - /* ini, inc, min, max */ - $$->factor = new Factor( $1->loc, pd->nextRepId++, $2->expression, - $4->action, $6->action, $8->action, $9->action, 0, 0, - $1->type ); - }; -factor: - '(' join ')' final { - /* Create a new factor going to a parenthesized join. */ - $$->factor = new Factor( $2->join ); - $2->join->loc = $1->loc; - }; - -nonterm range_lit -{ - Literal *literal; -}; - -# Literals which can be the end points of ranges. -range_lit: - TK_Literal final { - /* Range literas must have only one char. We restrict this in the parse tree. */ - $$->literal = new Literal( $1->loc, false, $1->data, - $1->length, Literal::LitString ); - }; -range_lit: - alphabet_num final { - /* Create a new literal number. */ - $$->literal = new Literal( $1->token.loc, $1->neg, - $1->token.data, $1->token.length, Literal::Number ); - }; - -nonterm alphabet_num -{ - bool neg; - Token token; -}; - -# Any form of a number that can be used as a basic machine. */ -alphabet_num: - TK_UInt final { - $$->neg = false; - $$->token = *$1; - }; -alphabet_num: - '-' TK_UInt final { - $$->neg = true; - $$->token.set( $2->data, $2->length, $1->loc ); - }; -alphabet_num: - TK_Hex final { - $$->neg = false; - $$->token = *$1; - }; -# -# Regular Expressions. -# - -nonterm regular_expr -{ - RegExpr *regExpr; -}; - -# Parser for regular expression fsms. Any number of expression items which -# generally gives a machine one character long or one character long stared. -regular_expr: - regular_expr regular_expr_item final { - /* An optimization to lessen the tree size. If a non-starred char is - * directly under the left side on the right and the right side is - * another non-starred char then paste them together and return the - * left side. Otherwise just put the two under a new reg exp node. */ - if ( $2->reItem->type == ReItem::Data && !$2->reItem->star && - $1->regExpr->type == RegExpr::RecurseItem && - $1->regExpr->item->type == ReItem::Data && !$1->regExpr->item->star ) - { - /* Append the right side to the right side of the left and toss the - * right side. */ - $1->regExpr->item->data.append( $2->reItem->data ); - delete $2->reItem; - $$->regExpr = $1->regExpr; - } - else { - $$->regExpr = new RegExpr( $1->regExpr, $2->reItem ); - } - }; -regular_expr: - final { - /* Can't optimize the tree. */ - $$->regExpr = new RegExpr(); - }; - -nonterm regular_expr_item -{ - ReItem *reItem; -}; - -# RegularExprItems can be a character spec with an optional staring of the char. -regular_expr_item: - regular_expr_char RE_Star final { - $1->reItem->star = true; - $$->reItem = $1->reItem; - }; -regular_expr_item: - regular_expr_char final { - $$->reItem = $1->reItem; - }; - -nonterm regular_expr_char -{ - ReItem *reItem; -}; - -# A character spec can be a set of characters inside of square parenthesis, a -# dot specifying any character or some explicitly stated character. -regular_expr_char: - RE_SqOpen regular_expr_or_data RE_SqClose final { - $$->reItem = new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock ); - }; -regular_expr_char: - RE_SqOpenNeg regular_expr_or_data RE_SqClose final { - $$->reItem = new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ); - }; -regular_expr_char: - RE_Dot final { - $$->reItem = new ReItem( $1->loc, ReItem::Dot ); - }; -regular_expr_char: - RE_Char final { - $$->reItem = new ReItem( $1->loc, $1->data, $1->length ); - }; - -# The data inside of a [] expression in a regular expression. Accepts any -# number of characters or ranges. */ -nonterm regular_expr_or_data -{ - ReOrBlock *reOrBlock; -}; - -regular_expr_or_data: - regular_expr_or_data regular_expr_or_char final { - /* An optimization to lessen the tree size. If an or char is directly - * under the left side on the right and the right side is another or - * char then paste them together and return the left side. Otherwise - * just put the two under a new or data node. */ - if ( $2->reOrItem->type == ReOrItem::Data && - $1->reOrBlock->type == ReOrBlock::RecurseItem && - $1->reOrBlock->item->type == ReOrItem::Data ) - { - /* Append the right side to right side of the left and toss the - * right side. */ - $1->reOrBlock->item->data.append( $2->reOrItem->data ); - delete $2->reOrItem; - $$->reOrBlock = $1->reOrBlock; - } - else { - /* Can't optimize, put the left and right under a new node. */ - $$->reOrBlock = new ReOrBlock( $1->reOrBlock, $2->reOrItem ); - } - }; -regular_expr_or_data: - final { - $$->reOrBlock = new ReOrBlock(); - }; - -# A single character inside of an or expression. Can either be a character or a -# set of characters. -nonterm regular_expr_or_char -{ - ReOrItem *reOrItem; -}; - -regular_expr_or_char: - RE_Char final { - $$->reOrItem = new ReOrItem( $1->loc, $1->data, $1->length ); - }; -regular_expr_or_char: - RE_Char RE_Dash RE_Char final { - $$->reOrItem = new ReOrItem( $2->loc, $1->data[0], $3->data[0] ); - }; - -# -# Inline Lists for inline host code. -# - -type inline_list -{ - InlineList *inlineList; -}; - -nonterm inline_block uses inline_list; - -inline_block: - inline_block inline_block_item - final { - /* Append the item to the list, return the list. */ - $$->inlineList = $1->inlineList; - $$->inlineList->append( $2->inlineItem ); - }; - -inline_block: - final { - /* Start with empty list. */ - $$->inlineList = new InlineList; - }; - -type inline_item -{ - InlineItem *inlineItem; -}; - -nonterm inline_block_item uses inline_item; -nonterm inline_block_interpret uses inline_item; - -inline_block_item: - inline_expr_any - final { - $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text ); - }; - -inline_block_item: - inline_block_symbol - final { - $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text ); - }; - -inline_block_item: - inline_block_interpret - final { - /* Pass the inline item up. */ - $$->inlineItem = $1->inlineItem; - }; - -nonterm inline_block_symbol uses token_type; - -inline_block_symbol: ',' final { $$->token = *$1; }; -inline_block_symbol: ';' final { $$->token = *$1; }; -inline_block_symbol: '(' final { $$->token = *$1; }; -inline_block_symbol: ')' final { $$->token = *$1; }; -inline_block_symbol: '*' final { $$->token = *$1; }; -inline_block_symbol: TK_NameSep final { $$->token = *$1; }; - -# Interpreted statements in a struct block. */ -inline_block_interpret: - inline_expr_interpret final { - /* Pass up interpreted items of inline expressions. */ - $$->inlineItem = $1->inlineItem; - }; -inline_block_interpret: - KW_Hold ';' final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::Hold ); - }; -inline_block_interpret: - KW_Exec inline_expr ';' final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::Exec ); - $$->inlineItem->children = $2->inlineList; - }; -inline_block_interpret: - KW_Goto state_ref ';' final { - $$->inlineItem = new InlineItem( $1->loc, - new NameRef(nameRef), InlineItem::Goto ); - }; -inline_block_interpret: - KW_Goto '*' inline_expr ';' final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::GotoExpr ); - $$->inlineItem->children = $3->inlineList; - }; -inline_block_interpret: - KW_Next state_ref ';' final { - $$->inlineItem = new InlineItem( $1->loc, new NameRef(nameRef), InlineItem::Next ); - }; -inline_block_interpret: - KW_Next '*' inline_expr ';' final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::NextExpr ); - $$->inlineItem->children = $3->inlineList; - }; -inline_block_interpret: - KW_Call state_ref ';' final { - $$->inlineItem = new InlineItem( $1->loc, new NameRef(nameRef), InlineItem::Call ); - }; -inline_block_interpret: - KW_Call '*' inline_expr ';' final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::CallExpr ); - $$->inlineItem->children = $3->inlineList; - }; -inline_block_interpret: - KW_Ret ';' final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::Ret ); - }; -inline_block_interpret: - KW_Break ';' final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::Break ); - }; -inline_block_interpret: - KW_Ncall state_ref ';' final { - $$->inlineItem = new InlineItem( $1->loc, new NameRef(nameRef), - InlineItem::Ncall ); - }; -inline_block_interpret: - KW_Ncall '*' inline_expr ';' final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::NcallExpr ); - $$->inlineItem->children = $3->inlineList; - }; -inline_block_interpret: - KW_Nret ';' final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::Nret ); - }; -inline_block_interpret: - KW_Nbreak ';' final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::Nbreak ); - }; -inline_block_interpret: - TK_SubstRef final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::Subst ); - - ActionParamList::Iter api = *paramList; - for ( ; api.lte(); api++ ) { - if ( (*api)->name == $1->data ) - break; - } - - if ( api.end() ) - pd->id->error( $1->loc ) << "invalid parameter reference \"$" << $1->data << "\"" << endl; - else { - $$->inlineItem->substPos = api.pos(); - } - }; - - -nonterm inline_expr uses inline_list; - -inline_expr: - inline_expr inline_expr_item - final { - $$->inlineList = $1->inlineList; - $$->inlineList->append( $2->inlineItem ); - }; -inline_expr: - final { - /* Init the list used for this expr. */ - $$->inlineList = new InlineList; - }; - -nonterm inline_expr_item uses inline_item; - -inline_expr_item: - inline_expr_any - final { - /* Return a text segment. */ - $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text ); - }; -inline_expr_item: - inline_expr_symbol - final { - /* Return a text segment, must heap alloc the text. */ - $$->inlineItem = new InlineItem( $1->token.loc, $1->token.data, InlineItem::Text ); - }; -inline_expr_item: - inline_expr_interpret - final{ - /* Pass the inline item up. */ - $$->inlineItem = $1->inlineItem; - }; - -nonterm inline_expr_any uses token_type; - -inline_expr_any: IL_WhiteSpace try { $$->token = *$1; }; -inline_expr_any: IL_Comment try { $$->token = *$1; }; -inline_expr_any: IL_Literal try { $$->token = *$1; }; -inline_expr_any: IL_Symbol try { $$->token = *$1; }; -inline_expr_any: TK_UInt try { $$->token = *$1; }; -inline_expr_any: TK_Hex try { $$->token = *$1; }; -inline_expr_any: TK_Word try { $$->token = *$1; }; - -# Anything in a ExecValExpr that is not dynamically allocated. This includes -# all special symbols caught in inline code except the semi. - -nonterm inline_expr_symbol uses token_type; - -inline_expr_symbol: ',' try { $$->token = *$1; }; -inline_expr_symbol: '(' try { $$->token = *$1; }; -inline_expr_symbol: ')' try { $$->token = *$1; }; -inline_expr_symbol: '*' try { $$->token = *$1; }; -inline_expr_symbol: TK_NameSep try { $$->token = *$1; }; - -nonterm inline_expr_interpret uses inline_item; - -inline_expr_interpret: - KW_PChar - final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::PChar ); - }; -inline_expr_interpret: - KW_Char - final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::Char ); - }; -inline_expr_interpret: - KW_CurState - final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::Curs ); - }; -inline_expr_interpret: - KW_TargState - final { - $$->inlineItem = new InlineItem( $1->loc, InlineItem::Targs ); - }; -inline_expr_interpret: - KW_Entry '(' state_ref ')' - final { - $$->inlineItem = new InlineItem( $1->loc, - new NameRef(nameRef), InlineItem::Entry ); - }; - -# A local state reference. Cannot have :: prefix. -local_state_ref: - no_name_sep state_ref_names; - -# Clear the name ref structure. -no_name_sep: - final { - nameRef.empty(); - }; - -# A qualified state reference. -state_ref: opt_name_sep state_ref_names; - -# Optional leading name separator. -opt_name_sep: - TK_NameSep - final { - /* Insert an initial null pointer val to indicate the existence of the - * initial name seperator. */ - nameRef.setAs( 0 ); - }; -opt_name_sep: - final { - nameRef.empty(); - }; - -# List of names separated by :: -state_ref_names: - state_ref_names TK_NameSep TK_Word - final { - nameRef.append( $3->data ); - }; -state_ref_names: - TK_Word - final { - nameRef.append( $1->data ); - }; - -}%% - -%%{ - write types; - write data; -}%% - -void Parser6::init() -{ - %% write init; -} - -int Parser6::parseLangEl( int type, const Token *token ) -{ - %% write exec; - return errCount == 0 ? 0 : -1; -} - -void Parser6::clear() -{ - while ( block != 0 ) { - Parser6_Block *next = block->next; - free( block ); - block = next; - } - - clearTokdata( this ); -} - -void Parser6::tryMachineDef( const InputLoc &loc, char *name, - MachineDef *machineDef, bool isInstance ) -{ - GraphDictEl *newEl = pd->graphDict.insert( name ); - if ( newEl != 0 ) { - /* New element in the dict, all good. */ - newEl->value = new VarDef( name, machineDef ); - newEl->isInstance = isInstance; - newEl->loc = loc; - newEl->value->isExport = exportContext[exportContext.length()-1]; - - /* It it is an instance, put on the instance list. */ - if ( isInstance ) - pd->instanceList.append( newEl ); - } - else { - // Recover by ignoring the duplicate. - pd->id->error(loc) << "fsm \"" << name << "\" previously defined" << endl; - } -} - -ostream &Parser6::parse_error( int tokId, Token &token ) -{ - ostream &err = pd->id->error( token.loc ); - - err << "at token "; - if ( tokId < 128 ) - err << "\"" << Parser6_lelNames[tokId] << "\""; - else - err << Parser6_lelNames[tokId]; - if ( token.data != 0 ) - err << " with data \"" << token.data << "\""; - err << ": "; - - return err; -} - -int Parser6::token( InputLoc &loc, int tokId, char *tokstart, int toklen ) -{ - Token token; - token.data = tokstart; - token.length = toklen; - token.loc.fileName = loc.fileName; - token.loc.line = loc.line; - token.loc.col = loc.col; - int res = parseLangEl( tokId, &token ); - if ( res < 0 ) { - parse_error(tokId, token) << "parse error" << endl; - pd->id->abortCompile( 1 ); - } - return res; -} - -/* Send eof to all parsers. */ -void Parser6::terminateParser() -{ - /* FIXME: a proper token is needed here. Suppose we should use the - * location of EOF in the last file that the parser was referenced in. */ - InputLoc loc; - loc.fileName = ""; - loc.line = 0; - loc.col = 0; - - token( loc, Parser6_tk_eof, 0, 0 ); -} - - diff --git a/ragel/rlparse.lm b/ragel/rlparse.lm deleted file mode 100644 index 02a937c5..00000000 --- a/ragel/rlparse.lm +++ /dev/null @@ -1,207 +0,0 @@ -include 'ragel.lm' -include 'rlreduce.lm' - -namespace inline - lex - literal `fpc `fc `fcurs `ftargs - `fentry `fhold `fexec `fgoto `fnext - `fcall `fret `fbreak `fncall `fnret `fnbreak - - token ident /ident/ - token number /digit+/ - token hex_number /'0x' [0-9a-fA-F]+/ - token dec_number /'0x' [0-9a-fA-F]+/ - - token comment - / c_comment | cpp_comment / - - token string - / s_literal | d_literal / - - token whitespace - / ( [ \t] | NL )+ / - - literal - `{ `} `:: `* `, `( `) `; - - token var_ref - / "$" [a-zA-Z_][a-zA-Z_0-9]* / - { - if GblActionParams - { - input->push( make_token( - typeid, input->pull( match_length ) ) ) - } - else - { - # Just pull one char. Don't consume the word because it may - # be a keyword. - input->push( make_token( - typeid, input->pull( 1 ) ) ) - } - } - - token c_any - / any / - end - -end - - -namespace host - lex - literal `%%{ - - token close_inc /'}--%%'/ - { - input->push( make_token( typeid, input->pull( match_length ) ) ) - restoreGlobals() - } - - token close_imp /'}++%%'/ - { - input->push( make_token( typeid, input->pull( match_length ) ) ) - restoreGlobals() - } - - token slr / '%%' [^{] [^\n]* '\n' / - { - # Translates single line to multi-line - input->pull( 2 ) - R: str = input->pull( match_length - 3 ) - input->push( "}%%" ) - input->push( R ) - input->push( "%%{" ) - } - - rl NL / '\n' / - - rl s_literal - / "'" ([^'\\\n] | '\\' (any | NL))* "'" / - - rl d_literal - / '"' ([^"\\] | NL | '\\' (any | NL))* '"' / - - literal `define `= - - token ident /ident "'"?/ - token number /digit+/ - token hex_number /'0x' [0-9a-fA-F]+/ - - token comment - / c_comment | cpp_comment / - - token string - / s_literal | d_literal / - - token whitespace - / ( [ \t] | NL )+ / - - token c_any / any / - end - - def tok - [`define whitespace ident whitespace? number] :ImportDefNum - | [`define whitespace ident whitespace? string] :ImportDefStr - | [ident whitespace? `= whitespace? number] :ImportAssignNum - | [ident whitespace? `= whitespace? string] :ImportAssignStr - | [`define] :Def - | [`=] :Eq - | [ident] :Ident - | [number] :Number - | [hex_number] :HexNumber - | [comment] :Comment - | [string] :String - | [whitespace] :Whitespace - | [c_any] :Any -end - -reduction TopLevel - - # Pass Through. - # def tok - # [`define ident number] :Def1 - # | [`define ident string] :Def2 - # | [ident `= number] :Ass1 - # | [ident `= string] :Ass2 - # [`define whitespace ident whitespace? number] :ImportDefNum - # | [`define whitespace ident whitespace? string] :ImportDefStr - # | [ident whitespace? `= whitespace? number] :ImportAssignNum - # | [ident whitespace? `= whitespace? string] :ImportAssignStr - # | [`define] :Def - # | [`=] :Eq - # | [ident] :Ident - # | [number] :Number - # | [hex_number] :HexNumber - # | [comment] :Comment - # | [string] :String - # | [whitespace] :Whitespace - # | [c_any] :Any - - host::tok :ImportDefNum - { - if ( isImport ) - { - Literal *lit = new Literal( @number, - false /* $number->neg */, $number->data, - $number->length, Literal::Number ); - - string name( $ident->data, $ident->length ); - import( @ident, name, lit ); - } - } - - host::tok :ImportDefStr - { - if ( isImport ) - { - Literal *lit = new Literal( @string, false, - $string->data, $string->length, Literal::LitString ); - string name( $ident->data, $ident->length ); - import( @ident, name, lit ); - } - } - - host::tok :ImportAssignNum - { - if ( isImport ) - { - Literal *lit = new Literal( @number, - false /*$number->neg */, $number->data, - $number->length, Literal::Number ); - string name( $ident->data, $ident->length ); - import( @ident, name, lit ); - } - } - - host::tok :ImportAssignStr - { - if ( isImport ) - { - Literal *lit = new Literal( @string, false, - $string->data, $string->length, Literal::LitString ); - - string name( $ident->data, $ident->length ); - import( @ident, name, lit ); - } - } - -end - - -export RagelError: str - -# File name. The open is expected to succeed. It is tested before the colm -# program is called. -A: list_el = argv->pop_head_el() -GblFileName = A->value - -# Remaining items are include paths. -while ( argv->length > 0 ) { - A = argv->pop_head_el() - GblIncludePaths->push_tail_el( A ) -} - -Stream: stream = open( GblFileName, "r" ) -reduce TopLevel start[ Stream ] -RagelError = error diff --git a/ragel/rlreduce.lm b/ragel/rlreduce.lm deleted file mode 100644 index fe25cd38..00000000 --- a/ragel/rlreduce.lm +++ /dev/null @@ -1,2570 +0,0 @@ -reduction TopLevel - host::opt_bom :Bom - { - id->utf8BomPresent = true; - } - - # def machine_name - # [`machine word `;] :MachineName - ragel::machine_name :MachineName - { - string machine( $mn_word->data, $mn_word->length ); - - if ( includeDepth == 0 ) { - /* Maintain section dict, section list and the cur section pointer - * if we are in the top level. */ - SectionDictEl *sdEl = id->sectionDict.find( machine ); - if ( sdEl == 0 ) { - sdEl = new SectionDictEl( machine ); - sdEl->value = new Section( machine ); - id->sectionDict.insert( sdEl ); - id->sectionList.append( sdEl->value ); - } - - section = sdEl->value; - - ParseDataDictEl *pdEl = id->parseDataDict.find( machine ); - if ( pdEl == 0 ) { - InputLoc sectionLoc = @1; - pdEl = new ParseDataDictEl( machine ); - pdEl->value = new ParseData( id, machine, - id->nextMachineId++, sectionLoc, hostLang, - minimizeLevel, minimizeOpt ); - id->parseDataDict.insert( pdEl ); - id->parseDataList.append( pdEl->value ); - } - - pd = pdEl->value; - } - } - - # def statement - # [assignment] :Assignment - # | [instantiation] :Instantiation - # | [nfa_union] :NfaUnion - # | [action_spec] :ActionSpec - # | [`prepush action_block] :PrePush commit - # | [`postpop action_block] :PostPop commit - # | [`nfaprepush action_block] :NfaPrePush commit - # | [`nfapostpop action_block] :NfaPostPop commit - # | [`variable variable_name inline_expr_reparse] :Variable commit - # | [`alphtype alphtype_type `;] :AlphType commit - # | [`access inline_expr_reparse] :Access commit - # | [`write Cmd: word ArgList: write_arg* `;] :Write commit - # | [`getkey inline_expr_reparse] :GetKey commit - # | [`import string `;] :Import commit - # | [`include include_spec `;] :Include commit - - # def assignment - # [opt_export def_name `= join `;] :Assignment commit - ragel::assignment :Assignment - { - InputLoc loc = &$def_name->loc; - - bool exportMachine = $opt_export->isSet; - if ( exportMachine ) - exportContext.append( true ); - - string name( $def_name->tok.data, $def_name->tok.length ); - - /* Main machine must be an instance. */ - bool isInstance = false; - if ( name == MAIN_MACHINE ) { - pd->id->warning(loc) << "main machine will be implicitly instantiated" << endl; - isInstance = true; - } - - MachineDef *machineDef = new MachineDef( $join->join ); - - /* Generic creation of machine for instantiation and assignment. */ - tryMachineDef( loc, name, machineDef, isInstance ); - - if ( exportMachine ) - exportContext.remove( exportContext.length()-1 ); - - /* Pass a location to join_or_lm */ - if ( machineDef->join != 0 ) - machineDef->join->loc = loc; - } - - # def instantiation - # [opt_export def_name `:= lm `;] :Instantiation commit - ragel::instantiation :Instantiation - { - InputLoc loc = &$def_name->loc; - - bool exportMachine = $opt_export->isSet; - if ( exportMachine ) - exportContext.append( true ); - - string name( $def_name->tok.data, $def_name->tok.length ); - - MachineDef *machineDef = $lm->machineDef; - - /* Generic creation of machine for instantiation and assignment. */ - tryMachineDef( loc, name, machineDef, true ); - - if ( exportMachine ) - exportContext.remove( exportContext.length()-1 ); - - /* Pass a location to join_or_lm */ - if ( machineDef->join != 0 ) - machineDef->join->loc = loc; - } - - # def def_name - # [word] :Word - ragel::def_name - { - RedToken tok; - colm_location loc; - } - - ragel::def_name :Word - { - string data( $word->data, $word->length ); - $$->tok.set( $word, @word ); - $$->loc = *@1; - - /* Make/get the priority key. The name may have already been referenced - * and therefore exist. */ - PriorDictEl *priorDictEl; - if ( pd->priorDict.insert( data, pd->fsmCtx->nextPriorKey, &priorDictEl ) ) - pd->fsmCtx->nextPriorKey += 1; - pd->curDefPriorKey = priorDictEl->value; - - /* Make/get the local error key. */ - LocalErrDictEl *localErrDictEl; - if ( pd->localErrDict.insert( data, pd->nextLocalErrKey, &localErrDictEl ) ) - pd->nextLocalErrKey += 1; - pd->curDefLocalErrKey = localErrDictEl->value; - } - - # def nfa_union - # [def_name `|= nfa_rounds nfa_expr `;] :NfaUnion commit - ragel::nfa_union :NfaUnion - { - InputLoc loc = &$def_name->loc; - string name( $def_name->tok.data, $def_name->tok.length ); - - $nfa_expr->nfaUnion->roundsList = $nfa_rounds->roundsList; - - MachineDef *machineDef = new MachineDef( $nfa_expr->nfaUnion ); - - /* Generic creation of machine for instantiation and assignment. */ - tryMachineDef( loc, name, machineDef, true ); - } - - # def action_spec - # [`action word action_params action_block] :ActionSpecParams commit - # | [`action word action_block] :ActionSpec commit - ragel::action_spec - { - Action *action; - } - - ragel::action_spec :ActionSpecParams - { - string data( $word->data, $word->length ); - if ( pd->actionDict.find( data ) ) { - /* Recover by just ignoring the duplicate. */ - pd->id->error(@word) << "action \"" << data << "\" already defined" << endl; - } - else { - /* Add the action to the list of actions. */ - Action *newAction = new Action( &$action_block->loc, data, - $action_block->inlineList, pd->fsmCtx->nextCondId++ ); - - /* Insert to list and dict. */ - pd->fsmCtx->actionList.append( newAction ); - pd->actionDict.insert( newAction ); - - newAction->paramList = $action_params->paramList; - if ( $action_params->paramList != 0 ) - newAction->argListMap = new ActionArgListMap; - } - } - - ragel::action_spec :ActionSpec - { - string data( $word->data, $word->length ); - if ( pd->actionDict.find( data ) ) { - /* Recover by just ignoring the duplicate. */ - pd->id->error(@word) << "action \"" << data << "\" already defined" << endl; - } - else { - /* Add the action to the list of actions. */ - Action *newAction = new Action( &$action_block->loc, data, - $action_block->inlineList, pd->fsmCtx->nextCondId++ ); - - /* Insert to list and dict. */ - pd->fsmCtx->actionList.append( newAction ); - pd->actionDict.insert( newAction ); - } - } - - # def statement - # | [`prepush action_block] :PrePush commit - # | [`postpop action_block] :PostPop commit - ragel::statement :PrePush - { - if ( pd->fsmCtx->prePushExpr != 0 ) { - /* Recover by just ignoring the duplicate. */ - pd->id->error(@1) << "prepush code already defined" << endl; - } - pd->fsmCtx->prePushExpr = new InlineBlock( @1, $action_block->inlineList ); - - } - ragel::statement :PostPop - { - if ( pd->fsmCtx->postPopExpr != 0 ) { - /* Recover by just ignoring the duplicate. */ - pd->id->error(@1) << "postpop code already defined" << endl; - } - pd->fsmCtx->postPopExpr = new InlineBlock( @1, $action_block->inlineList ); - } - - # def statement - # [`nfaprepush action_block] :NfaPrePush commit - ragel::statement :NfaPrePush - { - if ( pd->fsmCtx->nfaPrePushExpr != 0 ) { - /* Recover by just ignoring the duplicate. */ - pd->id->error(@1) << "nfa_pre_push code already defined" << endl; - } - - pd->fsmCtx->nfaPrePushExpr = new InlineBlock( @1, $action_block->inlineList ); - } - - # def statement - # [`nfapostpop action_block] :NfaPostPop commit - ragel::statement :NfaPostPop - { - if ( pd->fsmCtx->nfaPostPopExpr != 0 ) { - /* Recover by just ignoring the duplicate. */ - pd->id->error(@1) << "nfa_post_pop code already defined" << endl; - } - - pd->fsmCtx->nfaPostPopExpr = new InlineBlock( @1, $action_block->inlineList ); - } - - # def statement - # | [`variable variable_name inline_expr_reparse] :Variable commit - # | [`access inline_expr_reparse] :Access commit - ragel::statement :Variable - { - string data( $variable_name->data, $variable_name->length ); - bool wasSet = pd->setVariable( data.c_str(), - $inline_expr_reparse->inlineList ); - if ( !wasSet ) - pd->id->error(@1) << "bad variable name: " << $variable_name->data << endl; - } - - ragel::statement :Access - { - pd->fsmCtx->accessExpr = $inline_expr_reparse->inlineList; - } - - # def statement - # | [`write Cmd: word ArgList: write_arg* `;] :Write commit - ragel::statement :Write - { - if ( !isImport && includeDepth == 0 ) { - { - InputItem *inputItem = new InputItem; - inputItem->type = InputItem::Write; - inputItem->loc = @Cmd; - inputItem->name = section->sectionName; - inputItem->section = section; - - id->inputItems.append( inputItem ); - } - id->curItem = id->curItem->next; - InputItem *inputItem = id->curItem; - - string cmd( $Cmd->data, $Cmd->length ); - inputItem->writeArgs.push_back( cmd ); - inputItem->writeArgs.insert( inputItem->writeArgs.end(), writeArgs.begin(), writeArgs.end() ); - - inputItem->pd = pd; - } - - /* Clear the write args collector. */ - writeArgs.clear(); - } - - # def alphtype_type - # [W1: word] :One - # | [W1: word W2: word] :Two - ragel::alphtype_type :One - { - string one( $W1->data, $W1->length ); - if ( ! pd->setAlphType( @W1, hostLang, one.c_str() ) ) { - // Recover by ignoring the alphtype statement. - pd->id->error(@W1) << "\"" << one << - "\" is not a valid alphabet type" << endl; - } - } - - ragel::alphtype_type :Two - { - string one( $W1->data, $W1->length ); - string two( $W2->data, $W2->length ); - if ( ! pd->setAlphType( @W1, hostLang, one.c_str(), two.c_str() ) ) { - // Recover by ignoring the alphtype statement. - pd->id->error(@W1) << "\"" << one << - "\" is not a valid alphabet type" << endl; - } - } - - # def statement - # | [`getkey inline_expr_reparse] :GetKey commit - ragel::statement :GetKey - { - pd->fsmCtx->getKeyExpr = $inline_expr_reparse->inlineList; - } - - ragel::open_inc :OpenInc - { - includeDepth += 1; - } - - ragel::close_inc :CloseInc - { - includeDepth -= 1; - } - - ragel::open_imp :OpenImp - { - isImport = true; - } - - ragel::close_imp :CloseImp - { - isImport = false; - } - - # def join - # [join `, expression] :Rec - # | [expression] :Base - ragel::join - { - Join *join; - } - - ragel::join :Rec - { - $$->join = $_join->join; - $$->join->exprList.append( $expression->expr ); - } - - ragel::join :Base - { - $$->join = new Join( $expression->expr ); - } - - # def expression - # [expr_left expression_op_list] :Expression - ragel::expression - { - Expression *expr; - } - - ragel::expression :Expression - { - // 1. reverse the list - // 2. put the new term at the end. - Expression *prev = new Expression( $expr_left->term ); - Expression *cur = $expression_op_list->expr; - while ( cur != 0 ) { - Expression *next = cur->expression; - - /* Reverse. */ - cur->expression = prev; - - prev = cur; - cur = next; - } - - $$->expr = prev; - } - - ragel::expr_left - { - Term *term; - } - - ragel::expr_left :Term - { - $$->term = $term->term; - } - - # def expression_op_list - # [expression_op expression_op_list] :Op - # | [] :Empty - ragel::expression_op_list - { - Expression *expr; - } - - ragel::expression_op_list :Op - { - $$->expr = new Expression( $_expression_op_list->expr, - $expression_op->term, $expression_op->type ); - } - - ragel::expression_op_list :Empty - { - $$->expr = 0; - } - - - # def expression_op - # [`| term] :Or - # | [`& term] :And - # | [`- term] :Sub - # | [`-- term] :Ssub - ragel::expression_op - { - Expression::Type type; - Term *term; - } - - ragel::expression_op :Or - { - $$->type = Expression::OrType; - $$->term = $term->term; - } - - ragel::expression_op :And - { - $$->type = Expression::IntersectType; - $$->term = $term->term; - } - - ragel::expression_op :Sub - { - $$->type = Expression::SubtractType; - $$->term = $term->term; - } - - ragel::expression_op :Ssub - { - $$->type = Expression::StrongSubtractType; - $$->term = $term->term; - } - - - # def term - # [term_left term_op_list_short] :Term - ragel::term - { - Term *term; - } - - ragel::term :Term - { - // 1. reverse the list - // 2. put the new term at the end. - Term *prev = new Term( $term_left->fwa ); - Term *cur = $term_op_list_short->term; - while ( cur != 0 ) { - Term *next = cur->term; - - /* Reverse. */ - cur->term = prev; - - prev = cur; - cur = next; - } - - $$->term = prev; - } - - # def term_left - # [factor_label] :FactorLabel - ragel::term_left - { - FactorWithAug *fwa; - } - - ragel::term_left :FactorLabel - { - $$->fwa = $factor_label->fwa; - } - - # # This list is done manually to get shortest match. - # def term_op_list_short - # [] :Empty - # | [term_op term_op_list_short] :Terms - ragel::term_op_list_short - { - Term *term; - } - - ragel::term_op_list_short :Empty - { - $$->term = 0; - } - - ragel::term_op_list_short :Terms - { - $$->term = new Term( $_term_op_list_short->term, - $term_op->fwa, $term_op->type ); - } - - - # def term_op - # [factor_label] :None - # | [`. factor_label] :Dot - # | [`:> factor_label] :ColonLt - # | [`:>> factor_label] :ColonLtLt - # | [`<: factor_label] :GtColon - ragel::term_op - { - Term::Type type; - FactorWithAug *fwa; - } - - ragel::term_op :None - { - $$->type = Term::ConcatType; - $$->fwa = $factor_label->fwa; - } - - ragel::term_op :Dot - { - $$->type = Term::ConcatType; - $$->fwa = $factor_label->fwa; - } - - ragel::term_op :ColonLt - { - $$->type = Term::RightStartType; - $$->fwa = $factor_label->fwa; - } - - ragel::term_op :ColonLtLt - { - $$->type = Term::RightFinishType; - $$->fwa = $factor_label->fwa; - } - - ragel::term_op :GtColon - { - $$->type = Term::LeftType; - $$->fwa = $factor_label->fwa; - } - - # def factor_label - # [word `: factor_label] :Label - # | [factor_ep] :Ep - ragel::factor_label - { - FactorWithAug *fwa; - } - - ragel::factor_label :Label - { - $$->fwa = $_factor_label->fwa; - - InputLoc loc = @word; - string label( $word->data, $word->length ); - - $$->fwa->labels.insert( $$->fwa->labels.begin(), Label(loc, label) ); - - if ( pd->id->isBreadthLabel( label ) ) - $$->fwa->labels[0].cut = true; - } - - ragel::factor_label :Ep - { - $$->fwa = $factor_ep->fwa; - } - - # def factor_ep - # [factor_aug `-> epsilon_target] :Epsilon - # | [factor_aug] :Base - ragel::factor_ep - { - FactorWithAug *fwa; - } - - ragel::factor_ep :Epsilon - { - $$->fwa = $factor_aug->fwa; - $1->fwa->epsilonLinks.append( EpsilonLink( @2, $epsilon_target->nameRef ) ); - } - - ragel::factor_ep :Base - { - $$->fwa = $factor_aug->fwa; - } - - # def epsilon_target - # [epsilon_target `:: word] :Rec - # | [word] :Base - ragel::epsilon_target - { - NameRef *nameRef; - } - - ragel::epsilon_target :Rec - { - $$->nameRef = $_epsilon_target->nameRef; - $$->nameRef->append( string( $word->data, $word->length ) ); - } - - ragel::epsilon_target :Base - { - $$->nameRef = new NameRef; - $$->nameRef->append( string( $word->data, $word->length ) ); - } - - # def named_action_ref - # [word] :Plain - # | [word `( opt_action_arg_list `)] :Args - ragel::named_action_ref - { - Action *action; - } - - ragel::named_action_ref :Plain - { - /* Set the name in the actionDict. */ - string data( $word->data, $word->length ); - Action *action = pd->actionDict.find( data ); - if ( action != 0 ) { - if ( action->paramList != 0 ) - pd->id->error(@word) << "expecting no action args for " << data << endp; - - /* Pass up the action element */ - $$->action = action; - } - else { - /* Will recover by returning null as the action. */ - pd->id->error(@word) << "action lookup of \"" << data << "\" failed" << endl; - $$->action = 0; - } - } - - ragel::named_action_ref :Args - { - /* Set the name in the actionDict. */ - string data( $word->data, $word->length ); - Action *action = pd->actionDict.find( data ); - if ( action != 0 ) { - if ( action->paramList == 0 ) - pd->id->error(@word) << "expecting action args" << endp; - - /* Pass up the action element */ - $$->action = action; - } - else { - /* Will recover by returning null as the action. */ - pd->id->error(@word) << "action lookup of \"" << data << "\" failed" << endl; - $$->action = 0; - } - - if ( $$->action != 0 ) { - ActionArgList *argList = $opt_action_arg_list->argList; - ActionParamList *paramList = action->paramList; - - /* Make sure the number of actions line up. */ - if ( argList->length() != paramList->length() ) { - pd->id->error(@1) << "wrong number of action " - "arguments for \"" << data << "\"" << endl; - } - - /* Now we need to specialize using the supplied args. We can only - * present an Action* to fsmcodegen. */ - ActionArgListMapEl *el = action->argListMap->find( argList ); - if ( el == 0 ) { - /* Allocate an action representing this specialization. */ - Action *specAction = Action::cons( @1, action, - argList, pd->fsmCtx->nextCondId++ ); - pd->fsmCtx->actionList.append( specAction ); - - el = action->argListMap->insert( argList, specAction ); - } - else { - /* Can delete $3->arg list. */ - delete $opt_action_arg_list->argList; - } - - $$->action = el->value; - } - } - - # def action_arg_list - # [action_arg_list `, action_ref] :Rec - # | [action_ref] :Base - ragel::action_arg_list - { - ActionArgList *argList; - } - - ragel::action_arg_list :Rec - { - $$->argList = $_action_arg_list->argList; - $$->argList->append( $action_ref->action ); - } - - ragel::action_arg_list :Base - { - $$->argList = new ActionArgList; - $$->argList->append( $action_ref->action ); - } - - # def opt_action_arg_list - # [action_arg_list] :List - # | [] :Empty - ragel::opt_action_arg_list - { - ActionArgList *argList; - } - - ragel::opt_action_arg_list :List - { - $$->argList = $action_arg_list->argList; - } - - ragel::opt_action_arg_list :Empty - { - $$->argList = new ActionArgList; - } - - # def action_ref - # [named_action_ref] :NamedRef - # | [`( named_action_ref `)] :ParenNamed - # | [action_block] :Block - ragel::action_ref - { - Action *action; - } - - ragel::action_ref :NamedRef - { - $$->action = $named_action_ref->action; - } - - ragel::action_ref :ParenNamed - { - $$->action = $named_action_ref->action; - } - - ragel::action_ref :Block - { - /* Create the action, add it to the list and pass up. */ - Action *newAction = new Action( &$action_block->loc, std::string(), - $action_block->inlineList, pd->fsmCtx->nextCondId++ ); - pd->fsmCtx->actionList.append( newAction ); - $$->action = newAction; - } - - # def action_params - # [`( opt_action_param_list `)] - ragel::action_params - { - ActionParamList *paramList; - } - - ragel::action_params :List - { - $$->paramList = $opt_action_param_list->paramList; - paramList = $2->paramList; - } - - # def opt_action_param_list - # [action_param_list] :List - # | [] :Empty - ragel::opt_action_param_list - { - ActionParamList *paramList; - } - - ragel::opt_action_param_list :List - { - $$->paramList = $action_param_list->paramList; - } - - ragel::opt_action_param_list :Empty - { - $$->paramList = new ActionParamList; - } - - # def action_param - # [word] - ragel::action_param - { - ActionParam *param; - } - - ragel::action_param :Word - { - string param( $word->data, $word->length ); - $$->param = new ActionParam( param ); - } - - # def action_param_list - # [action_param_list `, action_param] - # | [action_param] - ragel::action_param_list - { - ActionParamList *paramList; - } - - ragel::action_param_list :Rec - { - $$->paramList = $_action_param_list->paramList; - $$->paramList->append( $action_param->param ); - } - - ragel::action_param_list :Base - { - $$->paramList = new ActionParamList; - $$->paramList->append( $action_param->param ); - } - - - # def action_block - # [`{ c_select CInlineBlock: inline::inline_block `}] :C - # | [`{ ruby_select RubyInlineBlock: ruby_inline::inline_block ruby_inline::`}] - # | [`{ ocaml_select OCamlInlineBlock: ocaml_inline::inline_block ocaml_inline::`}] - # | [`{ crack_select OCamlInlineBlock: crack_inline::inline_block crack_inline::`}] - ragel::action_block - { - colm_location loc; - InlineList *inlineList; - } - - ragel::action_block :ActionBlock - { - $$->loc = *@1; - $$->inlineList = $CInlineBlock->inlineList; - } - - # def inline_expr_reparse - # [_inline_expr_reparse] :Reparse - # | [action_expr] :ActionExpr - ragel::inline_expr_reparse - { - InlineList *inlineList; - } - ragel::inline_expr_reparse :ActionExpr - { - $$->inlineList = $action_expr->inlineList; - } - - # def action_expr - # [`{ c_select CInlineExpr: inline::inline_expr `}] :C - # | [`{ ruby_select RubyInlineExpr: ruby_inline::inline_expr ruby_inline::`}] - # | [`{ ocaml_select OCamlInlineExpr: ocaml_inline::inline_expr ocaml_inline::`}] - # | [`{ crack_select CrackInlineExpr: crack_inline::inline_expr crack_inline::`}] - ragel::action_expr - { - colm_location loc; - InlineList *inlineList; - } - - ragel::action_expr :ActionExpr - { - $$->loc = *@1; - $$->inlineList = $CInlineExpr->inlineList; - } - - # def state_ref - # [opt_name_sep state_ref_names] :Ref - state_ref::state_ref - { - NameRef *nameRef; - } - - state_ref::state_ref :Ref - { - $$->nameRef = $state_ref_names->nameRef; - if ( $opt_name_sep->nameSep ) - $$->nameRef->prepend( "" ); - } - - # def opt_name_sep - # [srlex::`::] :ColonColon - # | [] :Empty - state_ref::opt_name_sep - { - bool nameSep; - } - - state_ref::opt_name_sep :ColonColon - { - $$->nameSep = true; - } - - state_ref::opt_name_sep :Empty - { - $$->nameSep = false; - } - - # def state_ref_names - # [state_ref_names srlex::`:: srlex::word] :Rec - # | [srlex::word] :Base - state_ref::state_ref_names - { - NameRef *nameRef; - } - - state_ref::state_ref_names :Rec - { - $$->nameRef = $_state_ref_names->nameRef; - $$->nameRef->append( string( $word->data, $word->length ) ); - } - - state_ref::state_ref_names :Base - { - $$->nameRef = new NameRef; - $$->nameRef->append( string( $word->data, $word->length ) ); - } - - # def priority_aug - # [uint] :NoSign - # | [`+ uint] :Plus - # | [`- uint] :Minus - ragel::priority_aug - { - int priorityNum; - } - ragel::priority_aug :NoSign - { - string data( $uint->data, $uint->length ); - $$->priorityNum = tryLongScan( @1, data.c_str() ); - } - ragel::priority_aug :Plus - { - string data( $uint->data, $uint->length ); - $$->priorityNum = tryLongScan( @1, data.c_str() ); - } - ragel::priority_aug :Minus - { - string data( $uint->data, $uint->length ); - $$->priorityNum = -1 * tryLongScan( @1, data.c_str() ); - } - - #def priority_name - # [word] :Word - - ragel::priority_name - { - int priorityName; - } - - ragel::priority_name :Word - { - string data( $word->data, $word->length ); - - // Lookup/create the priority key. - PriorDictEl *priorDictEl; - if ( pd->priorDict.insert( data, pd->fsmCtx->nextPriorKey, &priorDictEl ) ) - pd->fsmCtx->nextPriorKey += 1; - - // Use the inserted/found priority key. - $$->priorityName = priorDictEl->value; - } - - # def error_name - # [word] :Word - ragel::error_name - { - int errName; - } - - ragel::error_name :Word - { - string data( $word->data, $word->length ); - /* Lookup/create the priority key. */ - LocalErrDictEl *localErrDictEl; - if ( pd->localErrDict.insert( data, pd->nextLocalErrKey, &localErrDictEl ) ) - pd->nextLocalErrKey += 1; - - /* Use the inserted/found priority key. */ - $$->errName = localErrDictEl->value; - } - - # def aug_base - # [`@] :Finish | [`>] :Enter | [`%] :Leave | [`$] :All - - ragel::aug_base - { - colm_location loc; - AugType augType; - } - - ragel::aug_base :Enter - { $$->loc = *@1; $$->augType = at_start; } - ragel::aug_base :All - { $$->loc = *@1; $$->augType = at_all; } - ragel::aug_base :Finish - { $$->loc = *@1; $$->augType = at_finish; } - ragel::aug_base :Leave - { $$->loc = *@1; $$->augType = at_leave; } - - # def aug_cond - # [`>?] :Start1 | [`$?] :All1 | [`%?] :Leave1 - # | [`> `when] :Start2 | [`$ `when] :All2 | [`% `when] :Leave2 - # | [`inwhen] :Start3 | [`when] :All3 | [`outwhen] :Leave3 - - ragel::aug_cond - { - colm_location loc; - AugType augType; - } - - ragel::aug_cond :Start1 - { $$->loc = *@1; $$->augType = at_start; } - ragel::aug_cond :Start2 - { $$->loc = *@1; $$->augType = at_start; } - ragel::aug_cond :Start3 - { $$->loc = *@1; $$->augType = at_start; } - ragel::aug_cond :All1 - { $$->loc = *@1; $$->augType = at_all; } - ragel::aug_cond :All2 - { $$->loc = *@1; $$->augType = at_all; } - ragel::aug_cond :All3 - { $$->loc = *@1; $$->augType = at_all; } - ragel::aug_cond :Leave1 - { $$->loc = *@1; $$->augType = at_leave; } - ragel::aug_cond :Leave2 - { $$->loc = *@1; $$->augType = at_leave; } - ragel::aug_cond :Leave3 - { $$->loc = *@1; $$->augType = at_leave; } - - # def aug_to_state - # [`>~] :Start1 | [`<~] :NotStart1 | [`$~] :All1 - # | [`%~] :Final1 | [`@~] :NotFinal1 | [`<>~] :Middle1 - # | [`> `to] :Start2 | [`< `to] :NotStart2 | [`$ `to] :All2 - # | [`% `to] :Final2 | [`@ `to] :NotFinal2 | [`<> `to] :Middle2 - - ragel::aug_to_state - { - colm_location loc; - AugType augType; - } - - ragel::aug_to_state :Start1 - { $$->loc = *@1; $$->augType = at_start_to_state; } - ragel::aug_to_state :Start2 - { $$->loc = *@1; $$->augType = at_start_to_state; } - ragel::aug_to_state :NotStart1 - { $$->loc = *@1; $$->augType = at_not_start_to_state; } - ragel::aug_to_state :NotStart2 - { $$->loc = *@1; $$->augType = at_not_start_to_state; } - ragel::aug_to_state :All1 - { $$->loc = *@1; $$->augType = at_all_to_state; } - ragel::aug_to_state :All2 - { $$->loc = *@1; $$->augType = at_all_to_state; } - ragel::aug_to_state :Final1 - { $$->loc = *@1; $$->augType = at_final_to_state; } - ragel::aug_to_state :Final2 - { $$->loc = *@1; $$->augType = at_final_to_state; } - ragel::aug_to_state :NotFinal1 - { $$->loc = *@1; $$->augType = at_not_final_to_state; } - ragel::aug_to_state :NotFinal2 - { $$->loc = *@1; $$->augType = at_not_final_to_state; } - ragel::aug_to_state :Middle1 - { $$->loc = *@1; $$->augType = at_middle_to_state; } - ragel::aug_to_state :Middle2 - { $$->loc = *@1; $$->augType = at_middle_to_state; } - - # def aug_from_state - # [`>*] :Start1 | [`<*] :NotStart1 | [`$*] :All1 - # | [`%*] :Final1 | [`@*] :NotFinal1 | [`<>*] :Middle1 - # | [`> `from] :Start2 | [`< `from] :NotStart2 | [`$ `from] :All2 - # | [`% `from] :Final2 | [`@ `from] :NotFinal2 | [`<> `from] :Middle2 - - ragel::aug_from_state - { - colm_location loc; - AugType augType; - } - - ragel::aug_from_state :Start1 - { $$->loc = *@1; $$->augType = at_start_from_state; } - ragel::aug_from_state :Start2 - { $$->loc = *@1; $$->augType = at_start_from_state; } - ragel::aug_from_state :NotStart1 - { $$->loc = *@1; $$->augType = at_not_start_from_state; } - ragel::aug_from_state :NotStart2 - { $$->loc = *@1; $$->augType = at_not_start_from_state; } - ragel::aug_from_state :All1 - { $$->loc = *@1; $$->augType = at_all_from_state; } - ragel::aug_from_state :All2 - { $$->loc = *@1; $$->augType = at_all_from_state; } - ragel::aug_from_state :Final1 - { $$->loc = *@1; $$->augType = at_final_from_state; } - ragel::aug_from_state :Final2 - { $$->loc = *@1; $$->augType = at_final_from_state; } - ragel::aug_from_state :NotFinal1 - { $$->loc = *@1; $$->augType = at_not_final_from_state; } - ragel::aug_from_state :NotFinal2 - { $$->loc = *@1; $$->augType = at_not_final_from_state; } - ragel::aug_from_state :Middle1 - { $$->loc = *@1; $$->augType = at_middle_from_state; } - ragel::aug_from_state :Middle2 - { $$->loc = *@1; $$->augType = at_middle_from_state; } - - # def aug_eof - # [`>/] :Start1 | [`/] :Middle1 - # | [`> `eof] :Start2 | [`< `eof] :NotStart2 | [`$ `eof] :All2 - # | [`% `eof] :Final2 | [`@ `eof] :NotFinal2 | [`<> `eof] :Middle2 - - ragel::aug_eof - { - colm_location loc; - AugType augType; - } - - ragel::aug_eof :Start1 - { $$->loc = *@1; $$->augType = at_start_eof; } - ragel::aug_eof :Start2 - { $$->loc = *@1; $$->augType = at_start_eof; } - ragel::aug_eof :NotStart1 - { $$->loc = *@1; $$->augType = at_not_start_eof; } - ragel::aug_eof :NotStart2 - { $$->loc = *@1; $$->augType = at_not_start_eof; } - ragel::aug_eof :All1 - { $$->loc = *@1; $$->augType = at_all_eof; } - ragel::aug_eof :All2 - { $$->loc = *@1; $$->augType = at_all_eof; } - ragel::aug_eof :Final1 - { $$->loc = *@1; $$->augType = at_final_eof; } - ragel::aug_eof :Final2 - { $$->loc = *@1; $$->augType = at_final_eof; } - ragel::aug_eof :NotFinal1 - { $$->loc = *@1; $$->augType = at_not_final_eof; } - ragel::aug_eof :NotFinal2 - { $$->loc = *@1; $$->augType = at_not_final_eof; } - ragel::aug_eof :Middle1 - { $$->loc = *@1; $$->augType = at_middle_eof; } - ragel::aug_eof :Middle2 - { $$->loc = *@1; $$->augType = at_middle_eof; } - - # def aug_gbl_error - # [`>!] :Start1 | [`!] :Middle1 - # | [`> `err] :Start2 | [`< `err] :NotStart2 | [`$ `err] :All2 - # | [`% `err] :Final2 | [`@ `err] :NotFinal2 | [`<> `err] :Middle2 - - ragel::aug_gbl_error - { - colm_location loc; - AugType augType; - } - - ragel::aug_gbl_error :Start1 - { $$->loc = *@1; $$->augType = at_start_gbl_error; } - ragel::aug_gbl_error :Start2 - { $$->loc = *@1; $$->augType = at_start_gbl_error; } - ragel::aug_gbl_error :NotStart1 - { $$->loc = *@1; $$->augType = at_not_start_gbl_error; } - ragel::aug_gbl_error :NotStart2 - { $$->loc = *@1; $$->augType = at_not_start_gbl_error; } - ragel::aug_gbl_error :NotStart2 - { $$->loc = *@1; $$->augType = at_not_start_gbl_error; } - ragel::aug_gbl_error :All1 - { $$->loc = *@1; $$->augType = at_all_gbl_error; } - ragel::aug_gbl_error :All2 - { $$->loc = *@1; $$->augType = at_all_gbl_error; } - ragel::aug_gbl_error :Final1 - { $$->loc = *@1; $$->augType = at_final_gbl_error; } - ragel::aug_gbl_error :Final2 - { $$->loc = *@1; $$->augType = at_final_gbl_error; } - ragel::aug_gbl_error :NotFinal1 - { $$->loc = *@1; $$->augType = at_not_final_gbl_error; } - ragel::aug_gbl_error :NotFinal2 - { $$->loc = *@1; $$->augType = at_not_final_gbl_error; } - ragel::aug_gbl_error :Middle1 - { $$->loc = *@1; $$->augType = at_middle_gbl_error; } - ragel::aug_gbl_error :Middle2 - { $$->loc = *@1; $$->augType = at_middle_gbl_error; } - - # def aug_local_error - # [`>^] :Start1 | [`<^] :NotStart1 | [`$^] :All1 - # | [`%^] :Final1 | [`@^] :NotFinal1 | [`<>^] :Middle1 - # | [`> `lerr] :Start2 | [`< `lerr] :NotStart2 | [`$ `lerr] :All2 - # | [`% `lerr] :Final2 | [`@ `lerr] :NotFinal2 | [`<> `lerr] :Middle2 - - ragel::aug_local_error - { - colm_location loc; - AugType augType; - } - - ragel::aug_local_error :Start1 - { $$->loc = *@1; $$->augType = at_start_local_error; } - - ragel::aug_local_error :Start2 - { $$->loc = *@1; $$->augType = at_start_local_error; } - - ragel::aug_local_error :NotStart1 - { $$->loc = *@1; $$->augType = at_not_start_local_error; } - - ragel::aug_local_error :NotStart2 - { $$->loc = *@1; $$->augType = at_not_start_local_error; } - - ragel::aug_local_error :All1 - { $$->loc = *@1; $$->augType = at_all_local_error; } - - ragel::aug_local_error :All2 - { $$->loc = *@1; $$->augType = at_all_local_error; } - - ragel::aug_local_error :Final1 - { $$->loc = *@1; $$->augType = at_final_local_error; } - - ragel::aug_local_error :Final2 - { $$->loc = *@1; $$->augType = at_final_local_error; } - - ragel::aug_local_error :NotFinal1 - { $$->loc = *@1; $$->augType = at_not_final_local_error; } - - ragel::aug_local_error :NotFinal2 - { $$->loc = *@1; $$->augType = at_not_final_local_error; } - - ragel::aug_local_error :Middle1 - { $$->loc = *@1; $$->augType = at_middle_local_error; } - - ragel::aug_local_error :Middle2 - { $$->loc = *@1; $$->augType = at_middle_local_error; } - - # def factor_aug - # [factor_aug aug_base action_ref] :ActionRef - # | [factor_aug aug_base priority_aug] :PriorEmbed - # | [factor_aug aug_base `( priority_name `, priority_aug `)] :NamedPriorEmbed - # | [factor_aug aug_cond action_ref] :CondEmbed - # | [factor_aug aug_cond `! action_ref] :NegCondEmbed - # | [factor_aug aug_to_state action_ref] :ToStateAction - # | [factor_aug aug_from_state action_ref] :FromStateAction - # | [factor_aug aug_eof action_ref] :EofAction - # | [factor_aug aug_gbl_error action_ref] :GblErrorAction - # | [factor_aug aug_local_error action_ref] :LocalErrorDef - # | [factor_aug aug_local_error `( word `, action_ref `)] :LocalErrorName - # | [factor_rep] :Base - ragel::factor_aug - { - FactorWithAug *fwa; - } - - ragel::factor_aug :ActionRef - { - $$->fwa = $_factor_aug->fwa; - - /* Append the action to the factorWithAug, record the refernce from - * factorWithAug to the action and pass up the factorWithAug. */ - $$->fwa->actions.append( ParserAction( - &$aug_base->loc, $aug_base->augType, 0, $action_ref->action ) ); - } - - ragel::factor_aug :PriorEmbed - { - $$->fwa = $_factor_aug->fwa; - - $1->fwa->priorityAugs.append( PriorityAug( $aug_base->augType, - pd->curDefPriorKey, $priority_aug->priorityNum ) ); - } - - ragel::factor_aug :NamedPriorEmbed - { - $$->fwa = $_factor_aug->fwa; - - $1->fwa->priorityAugs.append( PriorityAug( $aug_base->augType, - $priority_name->priorityName, $priority_aug->priorityNum ) ); - } - - ragel::factor_aug :CondEmbed - { - $$->fwa = $_factor_aug->fwa; - - $$->fwa->conditions.append( ConditionTest( &$aug_cond->loc, - $aug_cond->augType, $action_ref->action, true ) ); - } - - ragel::factor_aug :NegCondEmbed - { - $$->fwa = $_factor_aug->fwa; - - $$->fwa->conditions.append( ConditionTest( &$aug_cond->loc, - $aug_cond->augType, $action_ref->action, false ) ); - } - - ragel::factor_aug :ToStateAction - { - $$->fwa = $_factor_aug->fwa; - - $$->fwa->actions.append( ParserAction( &$aug_to_state->loc, - $aug_to_state->augType, 0, $action_ref->action ) ); - } - - ragel::factor_aug :FromStateAction - { - $$->fwa = $_factor_aug->fwa; - - $$->fwa->actions.append( ParserAction( &$aug_from_state->loc, - $aug_from_state->augType, 0, $action_ref->action ) ); - } - - ragel::factor_aug :EofAction - { - $$->fwa = $_factor_aug->fwa; - $1->fwa->actions.append( ParserAction( &$aug_eof->loc, - $aug_eof->augType, 0, $action_ref->action ) ); - } - - ragel::factor_aug :GblErrorAction - { - $$->fwa = $_factor_aug->fwa; - - $1->fwa->actions.append( ParserAction( &$aug_gbl_error->loc, - $aug_gbl_error->augType, pd->curDefLocalErrKey, $action_ref->action ) ); - } - - ragel::factor_aug :LocalErrorDef - { - $$->fwa = $_factor_aug->fwa; - - $$->fwa->actions.append( ParserAction( &$aug_local_error->loc, - $aug_local_error->augType, pd->curDefLocalErrKey, $action_ref->action ) ); - } - - ragel::factor_aug :LocalErrorName - { - $$->fwa = $_factor_aug->fwa; - - $$->fwa->actions.append( ParserAction( &$aug_local_error->loc, - $aug_local_error->augType, $error_name->errName, $action_ref->action ) ); - } - - ragel::factor_aug :Base - { - $$->fwa = new FactorWithAug( $factor_rep->rep ); - } - - #def factor_rep - # [factor_neg factor_rep_op_list] :Op - - ragel::factor_rep - { - FactorWithRep *rep; - } - - ragel::factor_rep :Op - { - FactorWithRep *prev = new FactorWithRep( $factor_neg->neg ); - FactorWithRep *cur = $factor_rep_op_list->rep; - while ( cur != 0 ) { - FactorWithRep *next = cur->factorWithRep; - - /* Reverse. */ - cur->factorWithRep = prev; - - prev = cur; - cur = next; - } - - $$->rep = prev; - } - - - # def factor_rep_op_list - # [factor_rep_op factor_rep_op_list] - # | [] - ragel::factor_rep_op_list - { - FactorWithRep *rep; - } - ragel::factor_rep_op_list :Rec - { - $$->rep = $factor_rep_op->rep; - $$->rep->factorWithRep = $_factor_rep_op_list->rep; - } - ragel::factor_rep_op_list :Base - { - $$->rep = 0; - } - - # def factor_rep_op - # [`*] :Star - # | [`**] :StarStar - # | [`?] :Optional - # | [`+] :Plus - # | [`{ factor_rep_num `}] :ExactRep - # | [`{ `, factor_rep_num `}] :MaxRep - # | [`{ factor_rep_num `, `}] :MinRep - # | [`{ LowRep: factor_rep_num `, HighRep: factor_rep_num `}] :RangeRep - ragel::factor_rep_op - { - FactorWithRep *rep; - } - ragel::factor_rep_op :Star - { - $$->rep = new FactorWithRep( @1, 0, 0, 0, FactorWithRep::StarType ); - } - ragel::factor_rep_op :StarStar - { - $$->rep = new FactorWithRep( @1, 0, 0, 0, FactorWithRep::StarStarType ); - } - ragel::factor_rep_op :Optional - { - $$->rep = new FactorWithRep( @1, 0, 0, 0, FactorWithRep::OptionalType ); - } - ragel::factor_rep_op :Plus - { - $$->rep = new FactorWithRep( @1, 0, 0, 0, FactorWithRep::PlusType ); - } - ragel::factor_rep_op :ExactRep - { - $$->rep = new FactorWithRep( @1, 0, - $factor_rep_num->rep, 0, - FactorWithRep::ExactType ); - } - ragel::factor_rep_op :MaxRep - { - $$->rep = new FactorWithRep( @1, 0, - 0, $factor_rep_num->rep, - FactorWithRep::MaxType ); - } - ragel::factor_rep_op :MinRep - { - $$->rep = new FactorWithRep( @1, 0, - $factor_rep_num->rep, 0, - FactorWithRep::MinType ); - } - ragel::factor_rep_op :RangeRep - { - $$->rep = new FactorWithRep( @1, 0, - $LowRep->rep, $HighRep->rep, - FactorWithRep::RangeType ); - } - - # def factor_rep_num - # [uint] - ragel::factor_rep_num - { - int rep; - } - - ragel::factor_rep_num :RepNum - { - // Convert the priority number to a long. Check for overflow. - string data( $uint->data, $uint->length ); - errno = 0; - long rep = strtol( data.c_str(), 0, 10 ); - if ( errno == ERANGE && rep == LONG_MAX ) { - // Repetition too large. Recover by returing repetition 1. */ - pd->id->error(@uint) << "repetition number " << data << " overflows" << endl; - $$->rep = 1; - } - else { - // Cannot be negative, so no overflow. - $$->rep = rep; - } - } - - - # def factor_neg - # [`! factor_neg] :Bang - # | [`^ factor_neg] :Caret - # | [factor] :Base - ragel::factor_neg - { - FactorWithNeg *neg; - } - - ragel::factor_neg :Bang - { - $$->neg = new FactorWithNeg( @1, - $_factor_neg->neg, FactorWithNeg::NegateType ); - } - - ragel::factor_neg :Caret - { - $$->neg = new FactorWithNeg( @1, - $_factor_neg->neg, FactorWithNeg::CharNegateType ); - } - - ragel::factor_neg :Base - { - $$->neg = new FactorWithNeg( $factor->factor ); - } - - # def opt_max_arg - # [`, action_ref] - ragel::opt_max_arg - { - Action *action; - } - - ragel::opt_max_arg :Action - { - $$->action = $action_ref->action; - } - - ragel::opt_max_arg :Empty - { - $$->action = 0; - } - - # - # :nfa - # - ragel::nfastar - { - Factor::NfaRepeatMode mode; - } - - ragel::nfastar :Default - { - $$->mode = Factor::NfaLegacy; - } - - ragel::nfastar :Lazy - { - $$->mode = Factor::NfaLazy; - } - - ragel::nfastar :Greedy - { - $$->mode = Factor::NfaGreedy; - } - - # - # :nfa_wrap - # - ragel::nfawrap - { - Factor::NfaRepeatMode mode; - } - - ragel::nfawrap :Default - { - $$->mode = Factor::NfaGreedy; - } - - ragel::nfawrap :Lazy - { - $$->mode = Factor::NfaLazy; - } - - ragel::nfawrap :Greedy - { - $$->mode = Factor::NfaGreedy; - } - - # - # :cond - # - ragel::colon_cond - { - Factor::Type type; - } - - ragel::colon_cond :Cond - { - $$->type = Factor::CondStar; - } - - ragel::colon_cond :CondStar - { - $$->type = Factor::CondStar; - } - - ragel::colon_cond :CondPlus - { - $$->type = Factor::CondPlus; - } - - - # def factor - # [alphabet_num] :AlphabetNum - # | [word] :Word - # | [string] :String - # | [lex_sqopen_pos reg_or_data re_or_sqclose] :PosOrBlock - # | [lex_sqopen_neg reg_or_data re_or_sqclose] :NegOrBlock - # | [lex_regex_open regex re_close] :Regex - # | [RL1: range_lit `.. RL2: range_lit] :Range - # | [RL1: range_lit `../i RL2: range_lit] :RangeIndep - # | [`:nfa `( uint `, expression `, - # Push: action_ref `, Pop: action_ref `, Init: action_ref `, Stay: action_ref `, - # Repeat: action_ref `, Exit: action_ref `):] :Nfa - # | [`:cond `( uint `, expression `, - # Init: action_ref `, Inc: action_ref `, Min: action_ref OptMax: opt_max_arg `):] :Cond - # | [`( join `)] :Join - ragel::factor - { - Factor *factor; - } - - ragel::factor :Join - { - /* Create a new factor going to a parenthesized join. */ - $$->factor = new Factor( $join->join ); - $$->factor->join->loc = @1; - } - - ragel::factor :AlphabetNum - { - $$->factor = new Factor( new Literal( $alphabet_num->tok.loc, - $alphabet_num->neg, $alphabet_num->tok.data, - $alphabet_num->tok.length, Literal::Number ) ); - } - - ragel::factor :Word - { - InputLoc loc = @word; - string s( $word->data, $word->length ); - - /* Find the named graph. */ - GraphDictEl *gdNode = pd->graphDict.find( s ); - if ( gdNode == 0 ) { - /* Recover by returning null as the factor node. */ - pd->id->error(loc) << "graph lookup of \"" << s << "\" failed" << endl; - $$->factor = 0; - } - else if ( gdNode->isInstance ) { - /* Recover by retuning null as the factor node. */ - pd->id->error(loc) << "references to graph instantiations not allowed " - "in expressions" << endl; - $$->factor = 0; - } - else { - /* Create a factor node that is a lookup of an expression. */ - $$->factor = new Factor( loc, gdNode->value ); - } - } - - ragel::factor :String - { - $$->factor = new Factor( new Literal( @string, false, - $string->data, $string->length, Literal::LitString ) ); - } - - #ragel::factor :HexString - #{ - # $$->factor = new Factor( new Literal( @hex_string, false, - # $hex_string->data, $hex_string->length, Literal::HexString ) ); - #} - - ragel::factor :Range - { - $$->factor = new Factor( new Range( $RL1->literal, $RL2->literal, false ) ); - } - - ragel::factor :RangeIndep - { - $$->factor = new Factor( new Range( $RL1->literal, $RL2->literal, true ) ); - } - - # | [lex_sqopen_pos reg_or_data re_or_sqclose] :PosOrBlock - ragel::factor :PosOrBlock - { - $$->factor = new Factor( new ReItem( @lex_sqopen_pos, - $reg_or_data->reOrBlock, ReItem::OrBlock ) ); - } - - ragel::factor :NegOrBlock - { - $$->factor = new Factor( new ReItem( @lex_sqopen_neg, - $reg_or_data->reOrBlock, ReItem::NegOrBlock ) ); - } - - ragel::factor :Nfa - { - /* push, pop, init, stay, repeat, exit */ - $$->factor = new Factor( @1, pd->nextRepId++, $expression->expr, - $Push->action, $Pop->action, $Init->action, $Stay->action, - $Repeat->action, $Exit->action, Factor::NfaRep ); - $$->factor->mode = $1->mode; - } - - ragel::factor :NfaWrap - { - /* push, pop, init, stay, repeat, exit */ - $$->factor = new Factor( @1, pd->nextRepId++, $expression->expr, - $Push->action, $Pop->action, $Init->action, $Stay->action, - 0, $Exit->action, Factor::NfaWrap ); - $$->factor->mode = $1->mode; - } - - ragel::factor :Cond - { - /* init, inc, min, opt-max. */ - $$->factor = new Factor( @2, pd->nextRepId++, $expression->expr, - $Init->action, $Inc->action, $Min->action, $OptMax->action, 0, 0, $1->type ); - } - - ragel::factor :Regex - { - bool caseInsensitive = false; - checkLitOptions( pd->id, @re_close, $re_close->data, $re_close->length, caseInsensitive ); - if ( caseInsensitive ) - $2->regExpr->caseInsensitive = true; - $$->factor = new Factor( $regex->regExpr ); - } - - # def regex - # [reg_item_rep_list] :List - ragel::regex - { - RegExpr *regExpr; - } - - ragel::regex :List - { - $$->regExpr = $reg_item_rep_list->regExpr; - } - - # def reg_item_rep_list - # [reg_item_rep_list reg_item_rep] :Rec - # | [] :Base - ragel::reg_item_rep_list - { - RegExpr *regExpr; - } - ragel::reg_item_rep_list :Rec - { - $$->regExpr = new RegExpr( $_reg_item_rep_list->regExpr, - $reg_item_rep->reItem ); - } - ragel::reg_item_rep_list :Base - { - $$->regExpr = new RegExpr(); - } - - # def reg_item_rep - # [reg_item re_star] :Star - # | [reg_item] :Base - ragel::reg_item_rep - { - ReItem *reItem; - } - - ragel::reg_item_rep :Star - { - $$->reItem = $reg_item->reItem; - $$->reItem->star = true; - } - - ragel::reg_item_rep :Base - { - $$->reItem = $reg_item->reItem; - } - - # def reg_item - # [re_sqopen_pos reg_or_data re_or_sqclose] :PosOrBlock - # | [re_sqopen_neg reg_or_data re_or_sqclose] :NegOrBlock - # | [re_dot] :Dot - # | [re_char] :Char - ragel::reg_item - { - ReItem *reItem; - } - ragel::reg_item :PosOrBlock - { - $$->reItem = new ReItem( @1, $reg_or_data->reOrBlock, ReItem::OrBlock ); - } - ragel::reg_item :NegOrBlock - { - $$->reItem = new ReItem( @1, $reg_or_data->reOrBlock, ReItem::NegOrBlock ); - } - ragel::reg_item :Dot - { - $$->reItem = new ReItem( @1, ReItem::Dot ); - } - ragel::reg_item :Char - { - string data( $re_char->data, $re_char->length ); - char *c = unescape( data.c_str() ); - $$->reItem = new ReItem( @re_char, c, strlen(c) ); - delete[] c; - } - - # def reg_or_data - # [reg_or_data reg_or_char] :Data - # | [] :Base - ragel::reg_or_data - { - ReOrBlock *reOrBlock; - } - - ragel::reg_or_data :Data - { - /* An optimization to lessen the tree size. If an or char is directly - * under the left side on the right and the right side is another or - * char then paste them together and return the left side. Otherwise - * just put the two under a new or data node. */ - if ( $reg_or_char->reOrItem->type == ReOrItem::Data && - $_reg_or_data->reOrBlock->type == ReOrBlock::RecurseItem && - $_reg_or_data->reOrBlock->item->type == ReOrItem::Data ) - { - /* Append the right side to right side of the left and toss the - * right side. */ - $_reg_or_data->reOrBlock->item->data.append( $reg_or_char->reOrItem->data ); - delete $reg_or_char->reOrItem; - $$->reOrBlock = $_reg_or_data->reOrBlock; - } - else { - /* Can't optimize, put the left and right under a new node. */ - $$->reOrBlock = new ReOrBlock( $_reg_or_data->reOrBlock, $reg_or_char->reOrItem ); - } - } - - ragel::reg_or_data :Base - { - $$->reOrBlock = new ReOrBlock(); - } - - # def reg_or_char - # [re_or_char] :Char - # | [Low: re_or_char re_or_dash High: re_or_char] :Range - ragel::reg_or_char - { - ReOrItem *reOrItem; - } - - ragel::reg_or_char :Char - { - // ReOrItem *reOrItem; - char *c = unescape( $re_or_char->data, $re_or_char->length ); - $$->reOrItem = new ReOrItem( @re_or_char, c, 1 ); - delete[] c; - } - - ragel::reg_or_char :Range - { - // ReOrItem *reOrItem; - char *low = unescape( $Low->data, $Low->length ); - char *high = unescape( $High->data, $High->length ); - $$->reOrItem = new ReOrItem( @re_or_dash, low[0], high[0] ); - delete[] low; - delete[] high; - } - - - # def alphabet_num - # [uint] :Uint - # | [`- uint] :Neg - # | [hex] :Hex - ragel::alphabet_num - { - bool neg; - RedToken tok; - } - - ragel::alphabet_num :Uint - { - $$->neg = false; - $$->tok.set( $uint, @uint ); - } - - ragel::alphabet_num :Neg - { - $$->neg = true; - $$->tok.set( $uint, @1 ); - } - - ragel::alphabet_num :Hex - { - $$->neg = false; - $$->tok.set( $hex, @hex ); - } - - # def range_lit - # [string] :String - # | [alphabet_num] :AN - ragel::range_lit - { - Literal *literal; - } - - ragel::range_lit :String - { - /* Range literals must have only one char. We restrict this in the - * parse tree. */ - $$->literal = new Literal( @string, false, - $string->data, $string->length, Literal::LitString ); - } - - ragel::range_lit :AN - { - $$->literal = new Literal( $alphabet_num->tok.loc, - $alphabet_num->neg, $alphabet_num->tok.data, - $alphabet_num->tok.length, Literal::Number ); - } - - # def lm - # [join] :Join - # | [`|* lm_stmt_list `*|] :Lm - ragel::lm - { - MachineDef *machineDef; - } - - ragel::lm :Join - { - $$->machineDef = new MachineDef( $join->join ); - } - - ragel::lm :Lm - { - /* Create a new factor going to a longest match structure. Record in - * the parse data that we have a longest match. */ - LongestMatch *lm = new LongestMatch( @1, $lm_stmt_list->lmPartList ); - pd->lmList.append( lm ); - for ( LmPartList::Iter lmp = *$lm_stmt_list->lmPartList; lmp.lte(); lmp++ ) - lmp->longestMatch = lm; - $$->machineDef = new MachineDef( lm ); - } - - ragel::lm :LmNfa - { - /* Create a new factor going to a longest match structure. Record in - * the parse data that we have a longest match. */ - LongestMatch *lm = new LongestMatch( @1, $lm_stmt_list->lmPartList ); - lm->nfaConstruction = true; - pd->lmList.append( lm ); - for ( LmPartList::Iter lmp = *$lm_stmt_list->lmPartList; lmp.lte(); lmp++ ) - lmp->longestMatch = lm; - $$->machineDef = new MachineDef( lm ); - } - - # def lm_stmt_list - # [lm_stmt_list lm_stmt] :Rec - # | [lm_stmt] :Base - ragel::lm_stmt_list - { - LmPartList *lmPartList; - } - ragel::lm_stmt_list :Rec - { - $$->lmPartList = $_lm_stmt_list->lmPartList; - if ( $lm_stmt->lmPart != 0 ) - $$->lmPartList->append( $lm_stmt->lmPart ); - } - ragel::lm_stmt_list :Base - { - $$->lmPartList = new LmPartList; - if ( $lm_stmt->lmPart != 0 ) - $$->lmPartList->append( $lm_stmt->lmPart ); - } - - - # def lm_stmt - # [join opt_lm_act `;] :LmStmt commit - # | [assignment] :Assignment - # | [action_spec] :ActionSpec - - ragel::lm_stmt - { - LongestMatchPart *lmPart; - } - - ragel::lm_stmt :LmStmt - { - InputLoc loc; - loc.line = 1; - loc.fileName = 0; - - Join *join = $join->join; - Action *action = $opt_lm_act->action; - - if ( action != 0 ) - action->isLmAction = true; - - /* Provide a location to join. Unfortunately We don't - * have the start of the join as in other occurances. Use the end. */ - join->loc = loc; - - $$->lmPart = new LongestMatchPart( join, action, - loc, pd->nextLongestMatchId++ ); - } - - ragel::lm_stmt :Assignment - { - $$->lmPart = 0; - } - - ragel::lm_stmt :ActionSpec - { - $$->lmPart = 0; - } - - # def opt_lm_act - # [lm_act] :Act - # | [] :Empty - ragel::opt_lm_act - { - Action *action; - } - - ragel::opt_lm_act :Act - { - $$->action = $lm_act->action; - } - - ragel::opt_lm_act :Empty - { - $$->action = 0; - } - - # def lm_act - # [`=> action_ref] :ActionRef - # | [action_block] :ActionBlock - ragel::lm_act - { - Action *action; - } - ragel::lm_act :ActionRef - { - $$->action = $action_ref->action; - } - ragel::lm_act :ActionBlock - { - /* Create the action, add it to the list and pass up. */ - Action *newAction = new Action( &$action_block->loc, std::string(), - $action_block->inlineList, pd->fsmCtx->nextCondId++ ); - pd->fsmCtx->actionList.append( newAction ); - $$->action = newAction; - } - - # def opt_export - # [`export] :Export - # | [] :Base - ragel::opt_export - { - bool isSet; - } - - ragel::opt_export :Export - { - $$->isSet = true; - } - - ragel::opt_export :Base - { - $$->isSet = false; - } - - - # def nfa_expr - # [nfa_expr `| term] :Union - # | [term] :Base - ragel::nfa_expr - { - NfaUnion *nfaUnion; - } - - ragel::nfa_expr :Union - { - $$->nfaUnion = $_nfa_expr->nfaUnion; - $$->nfaUnion->terms.append( $term->term ); - } - - ragel::nfa_expr :Base - { - $$->nfaUnion = new NfaUnion(); - $$->nfaUnion->terms.append( $term->term ); - } - - # def nfa_round_spec - # [uint `, uint] - ragel::nfa_round_spec - { - long depth; - long group; - } - - ragel::nfa_round_spec :Spec - { - // Convert the priority number to a long. Check for overflow. - errno = 0; - $$->depth = strtol( $Depth->data, 0, 10 ); - if ( $$->depth == LONG_MAX && errno == ERANGE ) - pd->id->error(@Depth) << "rounds " << $Depth->data << " overflows" << endl; - - $$->group = strtol( $Group->data, 0, 10 ); - if ( $$->group == LONG_MAX && errno == ERANGE ) - pd->id->error(@Groups) << "group " << $Group->data << " overflows" << endl; - } - - # def nfa_round_list - # [nfa_round_list `, nfa_round_spec] :Recurse - # | [nfa_round_spec] :Base - ragel::nfa_round_list - { - NfaRoundVect *roundsList; - } - - ragel::nfa_round_list :Recurse - { - $$->roundsList = $_nfa_round_list->roundsList; - $$->roundsList->append( NfaRound( $nfa_round_spec->depth, - $nfa_round_spec->group ) ); - } - - ragel::nfa_round_list :Base - { - $$->roundsList = new NfaRoundVect; - $$->roundsList->append( NfaRound( $nfa_round_spec->depth, - $nfa_round_spec->group ) ); - } - - # def nfa_rounds - # [`( nfa_round_list `)] :Rounds - ragel::nfa_rounds - { - NfaRoundVect *roundsList; - } - - ragel::nfa_rounds :Rounds - { - $$->roundsList = $nfa_round_list->roundsList; - } - - - ragel::write_arg :Word - { - string arg( $word->data, $word->length ); - writeArgs.push_back( arg ); - } -end - -reduction TopLevel - # def inline_block - # [block_item_list] :List - inline::inline_block - { - InlineList *inlineList; - } - - inline::inline_block :List - { - $$->inlineList = $block_item_list->inlineList; - } - - # def block_item_list - # [block_item block_item_list] :Rec - # | [] :Base - inline::block_item_list - { - InlineList *inlineList; - } - - inline::block_item_list :Rec - { - $$->inlineList = $_block_item_list->inlineList; - - if ( $block_item->inlineItem != 0 ) - $$->inlineList->prepend( $block_item->inlineItem ); - else if ( $block_item->inlineList != 0 ) { - $$->inlineList->prepend( *$block_item->inlineList ); - delete $block_item->inlineList; - } - } - - inline::block_item_list :Base - { - $$->inlineList = new InlineList; - } - - # def block_item - # [expr_any] :ExprAny - # | [block_symbol] :BlockSymbol - # | [block_interpret] :BlockInterpret - # | [`{ inline_block `}] :RecBlock - inline::block_item - { - InlineItem *inlineItem; - InlineList *inlineList; - } - - inline::block_item :ExprAny - { - $$->inlineItem = $expr_any->inlineItem; - } - - inline::block_item :BlockSymbol - { - $$->inlineItem = $block_symbol->inlineItem; - } - - inline::block_item :BlockInterpret - { - $$->inlineItem = $block_interpret->inlineItem; - } - - inline::block_item :RecBlock - { - $$->inlineList = $inline_block->inlineList; - $$->inlineList->prepend( new InlineItem( @1, "{", InlineItem::Text ) ); - $$->inlineList->append( new InlineItem( @1, "}", InlineItem::Text ) ); - $$->inlineItem = 0; - } - - # def expr_any - # [whitespace] :WS - #| [comment] :Comment - #| [string] :String - #| [number] :Number - #| [hex_number] :Hex - #| [ident] :Ident - #| [c_any] :Any - inline::expr_any - { - InlineItem *inlineItem; - } - - inline::expr_any :WS - { - string data( $whitespace->data, $whitespace->length ); - $$->inlineItem = new InlineItem( @whitespace, data, InlineItem::Text ); - } - - inline::expr_any :Comment - { - string data( $comment->data, $comment->length ); - $$->inlineItem = new InlineItem( @comment, data, InlineItem::Text ); - } - - inline::expr_any :String - { - string data( $string->data, $string->length ); - $$->inlineItem = new InlineItem( @string, data, InlineItem::Text ); - } - - inline::expr_any :Number - { - string data( $number->data, $number->length ); - $$->inlineItem = new InlineItem( @number, data, InlineItem::Text ); - } - - inline::expr_any :Hex - { - string data( $hex_number->data, $hex_number->length ); - $$->inlineItem = new InlineItem( @hex_number, data, InlineItem::Text ); - } - - inline::expr_any :Ident - { - string data( $ident->data, $ident->length ); - $$->inlineItem = new InlineItem( @ident, data, InlineItem::Text ); - } - - inline::expr_any :Any - { - string data( $c_any->data, $c_any->length ); - $$->inlineItem = new InlineItem( @c_any, data, InlineItem::Text ); - } - - # def block_symbol - # [`,] :B1 | [`;] :B2 | [`(] :B3 | [`)] :B4 | [`*] :B5 | [`::] :B6 - inline::block_symbol - { - InlineItem *inlineItem; - } - - inline::block_symbol :B1 - { - string data( $1->data, $1->length ); - $$->inlineItem = new InlineItem( @1, data, InlineItem::Text ); - } - - inline::block_symbol :B2 - { - string data( $1->data, $1->length ); - $$->inlineItem = new InlineItem( @1, data, InlineItem::Text ); - } - - inline::block_symbol :B3 - { - string data( $1->data, $1->length ); - $$->inlineItem = new InlineItem( @1, data, InlineItem::Text ); - } - - inline::block_symbol :B4 - { - string data( $1->data, $1->length ); - $$->inlineItem = new InlineItem( @1, data, InlineItem::Text ); - } - - inline::block_symbol :B5 - { - string data( $1->data, $1->length ); - $$->inlineItem = new InlineItem( @1, data, InlineItem::Text ); - } - - inline::block_symbol :B6 - { - string data( $1->data, $1->length ); - $$->inlineItem = new InlineItem( @1, data, InlineItem::Text ); - } - - # def block_interpret - # [expr_interpret] :ExprInterpret - # | [`fhold whitespace? `;] :Fhold - # | [`fgoto whitespace? `* inline_expr `;] :FgotoExpr - # | [`fnext whitespace? `* inline_expr `;] :FnextExpr - # | [`fcall whitespace? `* inline_expr `;] :FcallExpr - # | [`fncall whitespace? `* inline_expr `;] :FncallExpr - # | [`fexec inline_expr `;] :Fexec - # | [`fgoto state_ref srlex::`;] :FgotoSr - # | [`fnext state_ref srlex::`;] :FnextSr - # | [`fcall state_ref srlex::`;] :FcallSr - # | [`fncall state_ref srlex::`;] :FncallSr - # | [`fret `;] :Fret - # | [`fnret `;] :Fnret - # | [`fbreak `;] :Fbreak - # | [`fnbreak `;] :Fnbreak - inline::block_interpret - { - InlineItem *inlineItem; - } - - inline::block_interpret :Fhold - { - $$->inlineItem = new InlineItem( @1, InlineItem::Hold ); - } - inline::block_interpret :FgotoExpr - { - $$->inlineItem = new InlineItem( @1, InlineItem::GotoExpr ); - $$->inlineItem->children = $inline_expr->inlineList; - } - inline::block_interpret :FnextExpr - { - $$->inlineItem = new InlineItem( @1, InlineItem::NextExpr ); - $$->inlineItem->children = $inline_expr->inlineList; - } - inline::block_interpret :FcallExpr - { - $$->inlineItem = new InlineItem( @1, InlineItem::CallExpr ); - $$->inlineItem->children = $inline_expr->inlineList; - } - inline::block_interpret :FncallExpr - { - $$->inlineItem = new InlineItem( @1, InlineItem::NcallExpr ); - $$->inlineItem->children = $inline_expr->inlineList; - } - inline::block_interpret :Fexec - { - $$->inlineItem = new InlineItem( @1, InlineItem::Exec ); - $$->inlineItem->children = $inline_expr->inlineList; - } - inline::block_interpret :FgotoSr - { - $$->inlineItem = new InlineItem( @1, - $state_ref->nameRef, InlineItem::Goto ); - } - inline::block_interpret :FnextSr - { - $$->inlineItem = new InlineItem( @1, - $state_ref->nameRef, InlineItem::Next ); - } - inline::block_interpret :FcallSr - { - $$->inlineItem = new InlineItem( @1, - $state_ref->nameRef, InlineItem::Call ); - } - inline::block_interpret :FncallSr - { - $$->inlineItem = new InlineItem( @1, - $state_ref->nameRef, InlineItem::Ncall ); - } - inline::block_interpret :Fret - { - $$->inlineItem = new InlineItem( @1, InlineItem::Ret ); - } - inline::block_interpret :Fnret - { - $$->inlineItem = new InlineItem( @1, InlineItem::Nret ); - } - inline::block_interpret :Fbreak - { - $$->inlineItem = new InlineItem( @1, InlineItem::Break ); - } - inline::block_interpret :Fnbreak - { - $$->inlineItem = new InlineItem( @1, InlineItem::Nbreak ); - } - - inline::block_interpret :ExprInterpret - { - $$->inlineItem = $expr_interpret->inlineItem; - } - - # def inline_expr - # [expr_item_list] :List - inline::inline_expr - { - InlineList *inlineList; - } - - inline::inline_expr :List - { - $$->inlineList = $expr_item_list->inlineList; - } - - # def expr_item_list - # [expr_item_list expr_item] :Rec - # | [] :Empty - inline::expr_item_list - { - InlineList *inlineList; - } - - inline::expr_item_list :Rec - { - $$->inlineList = $_expr_item_list->inlineList; - $$->inlineList->append( $expr_item->inlineItem ); - } - - inline::expr_item_list :Empty - { - $$->inlineList = new InlineList; - } - - # def expr_item - # [expr_any] :ExprAny - # | [expr_symbol] :ExprSymbol - # | [expr_interpret] :ExprInterpret - inline::expr_item - { - InlineItem *inlineItem; - } - - inline::expr_item :ExprAny - { - $$->inlineItem = $expr_any->inlineItem; - } - inline::expr_item :ExprSymbol - { - string sym( $expr_symbol->sym ); - $$->inlineItem = new InlineItem( &$expr_symbol->loc, sym, InlineItem::Text ); - } - inline::expr_item :ExprInterpret - { - $$->inlineItem = $expr_interpret->inlineItem; - } - - # def expr_symbol - # [`,] | [`(] | [`)] | [`*] | [`::] - inline::expr_symbol - { - const char *sym; - colm_location loc; - } - - inline::expr_symbol :Comma - { $$->loc = *@1; $$->sym = ","; } - inline::expr_symbol :Open - { $$->loc = *@1; $$->sym = "("; } - inline::expr_symbol :Close - { $$->loc = *@1; $$->sym = ")"; } - inline::expr_symbol :Star - { $$->loc = *@1; $$->sym = "*"; } - inline::expr_symbol :DoubleColon - { $$->loc = *@1; $$->sym = "::"; } - - - # def expr_interpret - # [`fpc] :Fpc - # | [`fc] :Fc - # | [`fcurs] :Fcurs - # | [`ftargs] :Ftargs - # | [`fentry `( state_ref srlex::`)] :Fentry - # | [var_ref] :VarRef - inline::expr_interpret - { - InlineItem *inlineItem; - } - - inline::expr_interpret :Fpc - { - $$->inlineItem = new InlineItem( @1, InlineItem::PChar ); - } - - inline::expr_interpret :Fc - { - $$->inlineItem = new InlineItem( @1, InlineItem::Char ); - } - - inline::expr_interpret :Fcurs - { - $$->inlineItem = new InlineItem( @1, InlineItem::Curs ); - } - - inline::expr_interpret :Ftargs - { - $$->inlineItem = new InlineItem( @1, InlineItem::Targs ); - } - - inline::expr_interpret :Fentry - { - $$->inlineItem = new InlineItem( @1, $state_ref->nameRef, InlineItem::Entry ); - } - - inline::expr_interpret :VarRef - { - string data( $1->data + 1, $1->length - 1 ); - $$->inlineItem = new InlineItem( @1, InlineItem::Subst ); - - ActionParamList::Iter api = *paramList; - for ( ; api.lte(); api++ ) { - if ( (*api)->name == data ) - break; - } - - if ( api.end() ) - pd->id->error( @1 ) << "invalid parameter reference \"$" << $1->data << "\"" << endl; - else { - $$->inlineItem->substPos = api.pos(); - } - } - - host::section :MultiLine - { - if ( !isImport && includeDepth == 0 ) { - InputItem *inputItem = new InputItem; - inputItem->type = InputItem::EndSection; - inputItem->loc = @5; - id->inputItems.append( inputItem ); - - if ( section != 0 ) { - inputItem->section = section; - section->lastReference = inputItem; - } - - /* The end section may include a newline on the end, so - * we use the last line, which will count the newline. */ - inputItem = new InputItem; - inputItem->type = InputItem::HostData; - inputItem->loc.fileName = 0; - - id->inputItems.append( inputItem ); - - /* Record the parse data and move over the end section. */ - id->curItem = id->curItem->next; - id->curItem->pd = pd; - - /* Move over the host data. */ - id->curItem = id->curItem->next; - } - } - - host::section :Token - { - if ( !isImport && includeDepth == 0 ) { - if ( id->curItem->loc.fileName == 0 ) - id->curItem->loc = @1; - - head_t *head = tree_to_str( prg, sp, $*1, false, false ); - id->curItem->data.write( head->data, head->length ); - } - } -end diff --git a/ragel/rlscan.rl b/ragel/rlscan.rl deleted file mode 100644 index f745b9a0..00000000 --- a/ragel/rlscan.rl +++ /dev/null @@ -1,1193 +0,0 @@ -/* - * Copyright 2006-2007 Adrian Thurston - * Copyright 2011 Josef Goettgens - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include - -#include "ragel.h" -#include "rlscan.h" -#include "inputdata.h" - -//#define LOG_TOKENS - -using std::ifstream; -using std::istream; -using std::ostream; -using std::endl; - -enum InlineBlockType -{ - CurlyDelimited, - SemiTerminated -}; - -char *newTokdata( int toklen ) -{ - char *tokdata = new char[sizeof(TokHead) + toklen + 1]; - return tokdata + sizeof(TokHead); -} - -void deleteTokdata( char *tokdata ) -{ - if ( tokdata ) - delete[] ( tokdata - sizeof(TokHead) ); -} - -void linkTokdata( Parser6 *parser, char *tokdata ) -{ - TokHead *head = (TokHead*)( tokdata - sizeof(TokHead) ); - head->next = parser->tokHead; - parser->tokHead = head; -} - -void clearTokdata( Parser6 *parser ) -{ - while ( parser->tokHead != 0 ) { - TokHead *next = parser->tokHead->next; - delete[] (char*)parser->tokHead; - parser->tokHead = next; - } -} - -/* - * The Scanner for Importing - */ - -%%{ - machine inline_token_scan; - alphtype int; - access tok_; - - # Import scanner tokens. - import "rlparse.h"; - - main := |* - # Define of number. - IMP_Define IMP_Word IMP_UInt => { - int base = tok_ts - token_data; - int nameOff = 1; - int numOff = 2; - - directToParser( inclToParser, fileName, line, column, TK_Word, - token_strings[base+nameOff], token_lens[base+nameOff] ); - directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); - directToParser( inclToParser, fileName, line, column, TK_UInt, - token_strings[base+numOff], token_lens[base+numOff] ); - directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); - }; - - # Assignment of number. - IMP_Word '=' IMP_UInt => { - int base = tok_ts - token_data; - int nameOff = 0; - int numOff = 2; - - directToParser( inclToParser, fileName, line, column, TK_Word, - token_strings[base+nameOff], token_lens[base+nameOff] ); - directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); - directToParser( inclToParser, fileName, line, column, TK_UInt, - token_strings[base+numOff], token_lens[base+numOff] ); - directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); - }; - - # Define of literal. - IMP_Define IMP_Word IMP_Literal => { - int base = tok_ts - token_data; - int nameOff = 1; - int litOff = 2; - - directToParser( inclToParser, fileName, line, column, TK_Word, - token_strings[base+nameOff], token_lens[base+nameOff] ); - directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); - directToParser( inclToParser, fileName, line, column, TK_Literal, - token_strings[base+litOff], token_lens[base+litOff] ); - directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); - }; - - # Assignment of literal. - IMP_Word '=' IMP_Literal => { - int base = tok_ts - token_data; - int nameOff = 0; - int litOff = 2; - - directToParser( inclToParser, fileName, line, column, TK_Word, - token_strings[base+nameOff], token_lens[base+nameOff] ); - directToParser( inclToParser, fileName, line, column, '=', 0, 0 ); - directToParser( inclToParser, fileName, line, column, TK_Literal, - token_strings[base+litOff], token_lens[base+litOff] ); - directToParser( inclToParser, fileName, line, column, ';', 0, 0 ); - }; - - # Catch everything else. - any; - *|; -}%% - -%% write data; - -void Scanner::flushImport() -{ - int *p = token_data; - int *pe = token_data + cur_token; - int *eof = 0; - - %%{ - machine inline_token_scan; - write init; - write exec; - }%% - - if ( tok_ts == 0 ) - cur_token = 0; - else { - cur_token = pe - tok_ts; - int ts_offset = tok_ts - token_data; - memmove( token_data, token_data+ts_offset, cur_token*sizeof(token_data[0]) ); - memmove( token_strings, token_strings+ts_offset, cur_token*sizeof(token_strings[0]) ); - memmove( token_lens, token_lens+ts_offset, cur_token*sizeof(token_lens[0]) ); - } -} - -void Scanner::directToParser( Parser6 *toParser, const char *tokFileName, int tokLine, - int tokColumn, int type, char *tokdata, int toklen ) -{ - InputLoc loc; - - #ifdef LOG_TOKENS - cerr << "scanner:" << tokLine << ":" << tokColumn << - ": sending token to the parser " << Parser6_lelNames[type]; - cerr << " " << toklen; - if ( tokdata != 0 ) - cerr << " " << tokdata; - cerr << endl; - #endif - - loc.fileName = tokFileName; - loc.line = tokLine; - loc.col = tokColumn; - - toParser->token( loc, type, tokdata, toklen ); -} - -void Scanner::importToken( int token, char *start, char *end ) -{ - if ( cur_token == max_tokens ) - flushImport(); - - token_data[cur_token] = token; - if ( start == 0 ) { - token_strings[cur_token] = 0; - token_lens[cur_token] = 0; - } - else { - int toklen = end-start; - token_lens[cur_token] = toklen; - token_strings[cur_token] = new char[toklen+1]; - memcpy( token_strings[cur_token], start, toklen ); - token_strings[cur_token][toklen] = 0; - } - cur_token++; -} - -void Scanner::pass() -{ - if ( sectionPass ) - return; - - updateCol(); - - /* If no errors and we are at the bottom of the include stack (the - * source file listed on the command line) then write out the data. */ - if ( includeDepth == 0 && id->machineSpec == 0 && id->machineName == 0 ) - id->curItem->data.write( ts, te-ts ); -} - -void Scanner::pass( int token, char *start, char *end ) -{ - if ( sectionPass ) - return; - - if ( importMachines ) - importToken( token, start, end ); - - pass(); -} - -/* - * The scanner for processing sections, includes, imports, etc. - */ - -%%{ - machine section_parse; - alphtype int; - write data; -}%% - -void Scanner::init( ) -{ - %% write init; -} - -bool Scanner::active() -{ - if ( ignoreSection ) - return false; - - if ( parser == 0 && ! parserExistsError ) { - id->error(scan_loc()) << "this specification has no name, nor does any previous" - " specification" << endl; - parserExistsError = true; - } - - if ( parser == 0 ) - return false; - - return true; -} - -InputLoc Scanner::scan_loc() -{ - return makeInputLoc( fileName, line, column ); -} - -void Scanner::updateCol() -{ - char *from = lastnl; - if ( from == 0 ) - from = ts; - column += te - from; - lastnl = 0; -} - -void Scanner::handleMachine() -{ - if ( sectionPass ) { - /* Assign a name to the machine. */ - char *machine = word; - - SectionDictEl *sdEl = id->sectionDict.find( machine ); - if ( sdEl == 0 ) { - sdEl = new SectionDictEl( machine ); - sdEl->value = new Section( machine ); - id->sectionDict.insert( sdEl ); - } - - section = sdEl->value; - } - else { - - /* Assign a name to the machine. */ - char *machine = word; - - if ( !importMachines && inclSectionTarg == 0 ) { - ignoreSection = false; - - ParserDictEl *pdEl = id->parserDict.find( machine ); - if ( pdEl == 0 ) { - pdEl = new ParserDictEl( machine ); - pdEl->value = new Parser6( id, fileName, machine, sectionLoc, - id->hostLang, id->minimizeLevel, id->minimizeOpt ); - pdEl->value->init(); - id->parserDict.insert( pdEl ); - id->parserList.append( pdEl->value ); - - /* Also into the parse data dict. This is the new style. */ - ParseDataDictEl *pddEl = new ParseDataDictEl( machine ); - pddEl->value = pdEl->value->pd; - id->parseDataDict.insert( pddEl ); - id->parseDataList.append( pddEl->value ); - } - - parser = pdEl->value; - } - else if ( !importMachines && strcmp( inclSectionTarg, machine ) == 0 ) { - /* found include target */ - ignoreSection = false; - parser = inclToParser; - } - else { - /* ignoring section */ - ignoreSection = true; - parser = 0; - } - } -} - -void Scanner::handleInclude() -{ - if ( sectionPass ) - return; - - if ( active() ) { - char *inclSectionName = word; - const char **includeChecks = 0; - - /* Implement defaults for the input file and section name. */ - if ( inclSectionName == 0 ) - inclSectionName = parser->sectionName; - - if ( lit != 0 ) { - long length = 0; - bool caseInsensitive = false; - char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive ); - - includeChecks = parser->pd->id->makeIncludePathChecks( fileName, data ); - } - else { - char *test = new char[strlen(fileName)+1]; - strcpy( test, fileName ); - - includeChecks = new const char*[2]; - - includeChecks[0] = test; - includeChecks[1] = 0; - } - - long found = 0; - ifstream *inFile = parser->pd->id->tryOpenInclude( includeChecks, found ); - if ( inFile == 0 ) { - id->error(scan_loc()) << "include: failed to locate file" << endl; - const char **tried = includeChecks; - while ( *tried != 0 ) - id->error(scan_loc()) << "include: attempted: \"" << *tried++ << '\"' << endl; - } - else { - /* Don't include anything that's already been included. */ - if ( !parser->pd->duplicateInclude( includeChecks[found], inclSectionName ) ) { - parser->pd->includeHistory.push_back( IncludeHistoryItem( - includeChecks[found], inclSectionName ) ); - - Scanner scanner( id, includeChecks[found], *inFile, parser, - inclSectionName, includeDepth+1, false ); - scanner.do_scan( ); - } - - delete inFile; - } - } -} - -void Scanner::handleImport() -{ - if ( sectionPass ) - return; - - if ( active() ) { - long length = 0; - bool caseInsensitive = false; - char *data = prepareLitString( id, InputLoc(), lit, lit_len, length, caseInsensitive ); - - const char **importChecks = parser->pd->id->makeIncludePathChecks( fileName, data ); - - /* Open the input file for reading. */ - long found = 0; - ifstream *inFile = parser->pd->id->tryOpenInclude( importChecks, found ); - if ( inFile == 0 ) { - id->error(scan_loc()) << "import: could not open import file " << - "for reading" << endl; - const char **tried = importChecks; - while ( *tried != 0 ) - id->error(scan_loc()) << "import: attempted: \"" << *tried++ << '\"' << endl; - } - - Scanner scanner( id, importChecks[found], *inFile, parser, - 0, includeDepth+1, true ); - scanner.do_scan( ); - scanner.importToken( 0, 0, 0 ); - scanner.flushImport(); - delete inFile; - } -} - -%%{ - machine section_parse; - - # Need the defines representing tokens. - import "rlparse.h"; - - action clear_words { word = lit = 0; word_len = lit_len = 0; } - action store_word { word = tokdata; word_len = toklen; } - action store_lit { lit = tokdata; lit_len = toklen; } - - action mach_err { id->error(scan_loc()) << "bad machine statement" << endl; } - action incl_err { id->error(scan_loc()) << "bad include statement" << endl; } - action import_err { id->error(scan_loc()) << "bad import statement" << endl; } - action write_err { id->error(scan_loc()) << "bad write statement" << endl; } - - action handle_machine { handleMachine(); } - action handle_include { handleInclude(); } - action handle_import { handleImport(); } - - machine_stmt = - ( KW_Machine TK_Word @store_word ';' ) @handle_machine - <>err mach_err <>eof mach_err; - - include_names = ( - TK_Word @store_word ( TK_Literal @store_lit )? | - TK_Literal @store_lit - ) >clear_words; - - include_stmt = - ( KW_Include include_names ';' ) @handle_include - <>err incl_err <>eof incl_err; - - import_stmt = - ( KW_Import TK_Literal @store_lit ';' ) @handle_import - <>err import_err <>eof import_err; - - action write_command - { - if ( sectionPass ) { - InputItem *inputItem = new InputItem; - inputItem->type = InputItem::Write; - inputItem->loc.fileName = fileName; - inputItem->loc.line = line; - inputItem->loc.col = column; - inputItem->name = section->sectionName; - inputItem->section = section; - - /* Track the last reference. */ - inputItem->section->lastReference = inputItem; - - id->inputItems.append( inputItem ); - } - else { - if ( includeDepth == 0 && active() && - id->machineSpec == 0 && id->machineName == 0 ) - { - id->curItem = id->curItem->next; - id->curItem->pd = parser->pd; - id->curItem->parser = parser; - id->checkLastRef( id->curItem ); - } - } - } - - action write_arg - { - if ( sectionPass ) { - } - else { - if ( active() && id->machineSpec == 0 && id->machineName == 0 ) - id->curItem->writeArgs.push_back( strdup(tokdata) ); - } - } - - action write_close - { - if ( sectionPass ) { - } - else { - /* if ( active() && id->machineSpec == 0 && id->machineName == 0 ) - * id->curItem->writeArgs.append( 0 ); */ - } - } - - write_stmt = - ( KW_Write @write_command - ( TK_Word @write_arg )+ ';' @write_close ) - <>err write_err <>eof write_err; - - action handle_token - { - if ( sectionPass ) { - deleteTokdata( tokdata ); - } - else { - /* Send the token off to the parser. */ - if ( active() ) { - if ( tokdata != 0 ) { - linkTokdata( parser, tokdata ); - } - - directToParser( parser, fileName, line, column, type, tokdata, toklen ); - } - else { - deleteTokdata( tokdata ); - } - } - } - - # Catch everything else. - everything_else = - ^( KW_Machine | KW_Include | KW_Import | KW_Write ) @handle_token; - - main := ( - machine_stmt | - include_stmt | - import_stmt | - write_stmt | - everything_else - )*; -}%% - -void Scanner::token( int type, char c ) -{ - token( type, &c, &c + 1 ); -} - -void Scanner::token( int type ) -{ - token( type, 0, 0 ); -} - - -void Scanner::token( int type, char *start, char *end ) -{ - char *tokdata = 0; - int toklen = 0; - if ( start != 0 ) { - toklen = end-start; - tokdata = newTokdata( toklen + 1 ); - memcpy( tokdata, start, toklen ); - tokdata[toklen] = 0; - } - - processToken( type, tokdata, toklen ); -} - -void Scanner::processToken( int type, char *tokdata, int toklen ) -{ - int *p, *pe, *eof; - - if ( type < 0 ) - p = pe = eof = 0; - else { - p = &type; - pe = &type + 1; - eof = 0; - } - - %%{ - machine section_parse; - write exec; - }%% - - updateCol(); - - /* Record the last token for use in controlling the scan of subsequent - * tokens. */ - lastToken = type; -} - -void Scanner::startSection( ) -{ - parserExistsError = false; - - sectionLoc.fileName = fileName; - sectionLoc.line = line; - sectionLoc.col = column; -} - -void Scanner::endSection( ) -{ - /* Execute the eof actions for the section parser. */ - processToken( -1, 0, 0 ); - - if ( sectionPass ) { - InputItem *inputItem = new InputItem; - inputItem->type = InputItem::EndSection; - inputItem->loc.fileName = fileName; - inputItem->loc.line = line; - inputItem->loc.col = column; - id->inputItems.append( inputItem ); - if ( section != 0 ) { - inputItem->section = section; - section->lastReference = inputItem; - } - - if ( includeDepth == 0 ) { - if ( id->machineSpec == 0 && id->machineName == 0 ) { - /* The end section may include a newline on the end, so - * we use the last line, which will count the newline. */ - InputItem *inputItem = new InputItem; - inputItem->type = InputItem::HostData; - inputItem->loc.fileName = fileName; - inputItem->loc.line = line; - inputItem->loc.col = column; - id->inputItems.append( inputItem ); - } - } - } - else { - /* Close off the section with the parser. */ - if ( includeDepth == 0 && active() ) { - InputLoc loc; - loc.fileName = fileName; - loc.line = line; - loc.col = column; - - parser->token( loc, TK_EndSection, 0, 0 ); - - id->curItem = id->curItem->next; - - if ( parser != 0 ) { - id->curItem->pd = parser->pd; - id->curItem->parser = parser; - } - - id->checkLastRef( id->curItem ); - } - - if ( includeDepth == 0 ) { - if ( id->machineSpec == 0 && id->machineName == 0 ) { - id->curItem = id->curItem->next; - id->checkLastRef( id->curItem ); - } - } - } -} - -%%{ - machine rlscan; - - # This is sent by the driver code. - EOF = 0; - - action inc_nl { - lastnl = p; - column = 0; - line++; - } - NL = '\n' @inc_nl; - - # Identifiers, numbers, commetns, and other common things. - ident = ( alpha | '_' ) ( alpha |digit |'_' )*; - ocaml_ident = ( alpha | '_' ) ( alpha |digit |'_' )* "'"?; - number = digit+; - hex_number = '0x' [0-9a-fA-F]+; - - c_comment = - '/*' ( any | NL )* :>> '*/'; - - cpp_comment = - '//' [^\n]* NL; - - c_cpp_comment = c_comment | cpp_comment; - - ruby_comment = '#' [^\n]* NL; - - # These literal forms are common to host code and ragel. - s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'"; - d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"'; - host_re_literal = '/' ([^/\\] | NL | '\\' (any | NL))* '/'; - - whitespace = [ \t] | NL; - pound_comment = '#' [^\n]* NL; - - # An inline block of code for languages other than Ruby. - inline_code := |* - # Inline expression keywords. - "fpc" => { token( KW_PChar ); }; - "fc" => { token( KW_Char ); }; - "fcurs" => { token( KW_CurState ); }; - "ftargs" => { token( KW_TargState ); }; - "fentry" => { - whitespaceOn = false; - token( KW_Entry ); - }; - - # Inline statement keywords. - "fhold" => { - whitespaceOn = false; - token( KW_Hold ); - }; - "fexec" => { token( KW_Exec, 0, 0 ); }; - "fgoto" => { - whitespaceOn = false; - token( KW_Goto ); - }; - "fnext" => { - whitespaceOn = false; - token( KW_Next ); - }; - "fcall" => { - whitespaceOn = false; - token( KW_Call ); - }; - "fret" => { - whitespaceOn = false; - token( KW_Ret ); - }; - "fbreak" => { - whitespaceOn = false; - token( KW_Break ); - }; - "fncall" => { - whitespaceOn = false; - token( KW_Ncall ); - }; - "fnret" => { - whitespaceOn = false; - token( KW_Nret ); - }; - "fnbreak" => { - whitespaceOn = false; - token( KW_Nbreak ); - }; - - ident => { token( TK_Word, ts, te ); }; - - number => { token( TK_UInt, ts, te ); }; - hex_number => { token( TK_Hex, ts, te ); }; - - ( s_literal | d_literal ) - => { token( IL_Literal, ts, te ); }; - - whitespace+ => { - if ( whitespaceOn ) - token( IL_WhiteSpace, ts, te ); - }; - - c_cpp_comment => { token( IL_Comment, ts, te ); }; - - "::" => { token( TK_NameSep, ts, te ); }; - - # Some symbols need to go to the parser as with their cardinal value as - # the token type (as opposed to being sent as anonymous symbols) - # because they are part of the sequences which we interpret. The * ) ; - # symbols cause whitespace parsing to come back on. This gets turned - # off by some keywords. - - ";" => { - whitespaceOn = true; - token( *ts, ts, te ); - if ( inlineBlockType == SemiTerminated ) - fret; - }; - - "$" [a-zA-Z_][a-zA-Z_0-9]* => { - if ( parser != 0 && parser->parseSubstitutions ) - token( TK_SubstRef, ts+1, te ); - else { - token( IL_Symbol, ts, ts+1 ); - fexec ts+1; - } - }; - - [*)] => { - whitespaceOn = true; - token( *ts, ts, te ); - }; - - [,(] => { token( *ts, ts, te ); }; - - '{' => { - token( IL_Symbol, ts, te ); - curly_count += 1; - }; - - '}' => { - if ( --curly_count == 0 && inlineBlockType == CurlyDelimited ) { - /* Inline code block ends. */ - token( '}' ); - fret; - } - else { - /* Either a semi terminated inline block or only the closing - * brace of some inner scope, not the block's closing brace. */ - token( IL_Symbol, ts, te ); - } - }; - - EOF => { - id->error(scan_loc()) << "unterminated code block" << endl; - }; - - # Send every other character as a symbol. - any => { token( IL_Symbol, ts, te ); }; - *|; - - or_literal := |* - # Escape sequences in OR expressions. - '\\0' => { token( RE_Char, '\0' ); }; - '\\a' => { token( RE_Char, '\a' ); }; - '\\b' => { token( RE_Char, '\b' ); }; - '\\t' => { token( RE_Char, '\t' ); }; - '\\n' => { token( RE_Char, '\n' ); }; - '\\v' => { token( RE_Char, '\v' ); }; - '\\f' => { token( RE_Char, '\f' ); }; - '\\r' => { token( RE_Char, '\r' ); }; - '\\\n' => { updateCol(); }; - '\\' any => { token( RE_Char, ts+1, te ); }; - - # Range dash in an OR expression. - '-' => { token( RE_Dash, 0, 0 ); }; - - # Terminate an OR expression. - ']' => { token( RE_SqClose ); fret; }; - - EOF => { - id->error(scan_loc()) << "unterminated OR literal" << endl; - }; - - # Characters in an OR expression. - [^\]] => { token( RE_Char, ts, te ); }; - - *|; - - ragel_re_literal := |* - # Escape sequences in regular expressions. - '\\0' => { token( RE_Char, '\0' ); }; - '\\a' => { token( RE_Char, '\a' ); }; - '\\b' => { token( RE_Char, '\b' ); }; - '\\t' => { token( RE_Char, '\t' ); }; - '\\n' => { token( RE_Char, '\n' ); }; - '\\v' => { token( RE_Char, '\v' ); }; - '\\f' => { token( RE_Char, '\f' ); }; - '\\r' => { token( RE_Char, '\r' ); }; - '\\\n' => { updateCol(); }; - '\\' any => { token( RE_Char, ts+1, te ); }; - - # Terminate an OR expression. - '/' [i]? => { - token( RE_Slash, ts, te ); - fgoto parser_def; - }; - - # Special characters. - '.' => { token( RE_Dot ); }; - '*' => { token( RE_Star ); }; - - '[' => { token( RE_SqOpen ); fcall or_literal; }; - '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; - - EOF => { - id->error(scan_loc()) << "unterminated regular expression" << endl; - }; - - # Characters in an OR expression. - [^\/] => { token( RE_Char, ts, te ); }; - *|; - - # We need a separate token space here to avoid the ragel keywords. - write_statement := |* - ident => { token( TK_Word, ts, te ); } ; - [ \t\n]+ => { updateCol(); }; - ';' => { token( ';' ); fgoto parser_def; }; - - EOF => { - id->error(scan_loc()) << "unterminated write statement" << endl; - }; - *|; - - # Parser definitions. - parser_def := |* - #'length_cond' => { token( KW_Length ); }; - 'machine' => { token( KW_Machine ); }; - 'include' => { token( KW_Include ); }; - 'import' => { token( KW_Import ); }; - 'write' => { - token( KW_Write ); - fgoto write_statement; - }; - 'action' => { token( KW_Action ); }; - 'alphtype' => { token( KW_AlphType ); }; - 'prepush' => { token( KW_PrePush ); }; - 'postpop' => { token( KW_PostPop ); }; - - 'nfaprepush' => { token( KW_NfaPrePush ); }; - 'nfapostpop' => { token( KW_NfaPostPop ); }; - - # FIXME: Enable this post 5.17. - # 'range' => { token( KW_Range ); }; - - 'getkey' => { - token( KW_GetKey ); - inlineBlockType = SemiTerminated; - fcall inline_code; - }; - 'access' => { - token( KW_Access ); - inlineBlockType = SemiTerminated; - fcall inline_code; - }; - 'variable' => { - token( KW_Variable ); - inlineBlockType = SemiTerminated; - fcall inline_code; - }; - 'when' => { token( KW_When ); }; - 'inwhen' => { token( KW_InWhen ); }; - 'outwhen' => { token( KW_OutWhen ); }; - 'eof' => { token( KW_Eof ); }; - 'err' => { token( KW_Err ); }; - 'lerr' => { token( KW_Lerr ); }; - 'to' => { token( KW_To ); }; - 'from' => { token( KW_From ); }; - 'export' => { token( KW_Export ); }; - - # Identifiers. - ident => { token( TK_Word, ts, te ); } ; - - # Numbers - number => { token( TK_UInt, ts, te ); }; - hex_number => { token( TK_Hex, ts, te ); }; - - # Literals, with optionals. - ( s_literal | d_literal ) [i]? - => { token( TK_Literal, ts, te ); }; - - '[' => { token( RE_SqOpen ); fcall or_literal; }; - '[^' => { token( RE_SqOpenNeg ); fcall or_literal; }; - - '/' => { token( RE_Slash ); fgoto ragel_re_literal; }; - - # Ignore. - pound_comment => { updateCol(); }; - - ':=' => { token( TK_ColonEquals ); }; - '|=' => { token( TK_BarEquals ); }; - - # To State Actions. - ">~" => { token( TK_StartToState ); }; - "$~" => { token( TK_AllToState ); }; - "%~" => { token( TK_FinalToState ); }; - "<~" => { token( TK_NotStartToState ); }; - "@~" => { token( TK_NotFinalToState ); }; - "<>~" => { token( TK_MiddleToState ); }; - - # From State actions - ">*" => { token( TK_StartFromState ); }; - "$*" => { token( TK_AllFromState ); }; - "%*" => { token( TK_FinalFromState ); }; - "<*" => { token( TK_NotStartFromState ); }; - "@*" => { token( TK_NotFinalFromState ); }; - "<>*" => { token( TK_MiddleFromState ); }; - - # EOF Actions. - ">/" => { token( TK_StartEOF ); }; - "$/" => { token( TK_AllEOF ); }; - "%/" => { token( TK_FinalEOF ); }; - " { token( TK_NotStartEOF ); }; - "@/" => { token( TK_NotFinalEOF ); }; - "<>/" => { token( TK_MiddleEOF ); }; - - # Global Error actions. - ">!" => { token( TK_StartGblError ); }; - "$!" => { token( TK_AllGblError ); }; - "%!" => { token( TK_FinalGblError ); }; - " { token( TK_NotStartGblError ); }; - "@!" => { token( TK_NotFinalGblError ); }; - "<>!" => { token( TK_MiddleGblError ); }; - - # Local error actions. - ">^" => { token( TK_StartLocalError ); }; - "$^" => { token( TK_AllLocalError ); }; - "%^" => { token( TK_FinalLocalError ); }; - "<^" => { token( TK_NotStartLocalError ); }; - "@^" => { token( TK_NotFinalLocalError ); }; - "<>^" => { token( TK_MiddleLocalError ); }; - - # Middle. - "<>" => { token( TK_Middle ); }; - - # Conditions. - '>?' => { token( TK_StartCond ); }; - '$?' => { token( TK_AllCond ); }; - '%?' => { token( TK_LeavingCond ); }; - - '..' => { token( TK_DotDot ); }; - '../i' => { token( TK_DotDotIndep ); }; - - '**' => { token( TK_StarStar ); }; - '--' => { token( TK_DashDash ); }; - '->' => { token( TK_Arrow ); }; - '=>' => { token( TK_DoubleArrow ); }; - - ":>" => { token( TK_ColonGt ); }; - ":>>" => { token( TK_ColonGtGt ); }; - "<:" => { token( TK_LtColon ); }; - - ":nfa(" => { token( TK_ColonNfaOpen ); }; - ":cond(" => { token( TK_ColonCondOpen ); }; - ":condstar(" => { token( TK_ColonCondStarOpen ); }; - ":condplus(" => { token( TK_ColonCondPlusOpen ); }; - ":nomax(" => { token( TK_ColonNoMaxOpen ); }; - "):" => { token( TK_CloseColon ); }; - - # Opening of longest match. - "|*" => { token( TK_BarStar ); }; - - # Separater for name references. - "::" => { token( TK_NameSep, ts, te ); }; - - '}%%' => { - updateCol(); - endSection(); - fret; - }; - - [ \t\r]+ => { updateCol(); }; - - # If we are in a single line machine then newline may end the spec. - NL => { - updateCol(); - if ( singleLineSpec ) { - endSection(); - fret; - } - }; - - '{' => { - if ( lastToken == KW_Export || lastToken == KW_Entry ) - token( '{' ); - else { - token( '{' ); - curly_count = 1; - inlineBlockType = CurlyDelimited; - fcall inline_code; - } - }; - - EOF => { - id->error(scan_loc()) << "unterminated ragel section" << endl; - }; - - any => { token( *ts ); } ; - *|; - - # Outside code scanner. These tokens get passed through. - main := |* - 'define' => { pass( IMP_Define, 0, 0 ); }; - ident => { pass( IMP_Word, ts, te ); }; - number => { pass( IMP_UInt, ts, te ); }; - c_cpp_comment => { pass(); }; - ( s_literal | d_literal ) => { pass( IMP_Literal, ts, te ); }; - - '%%{' => { - updateCol(); - singleLineSpec = false; - startSection(); - fcall parser_def; - }; - '%%' => { - updateCol(); - singleLineSpec = true; - startSection(); - fcall parser_def; - }; - whitespace+ => { pass(); }; - EOF; - any => { pass( *ts, 0, 0 ); }; - *|; -}%% - -%% write data; - -void Scanner::do_scan() -{ - int bufsize = 8; - char *buf = new char[bufsize]; - int cs, act, have = 0; - int top; - - /* The stack is two deep, one level for going into ragel defs from the main - * machines which process outside code, and another for going into or literals - * from either a ragel spec, or a regular expression. */ - int stack[2]; - int curly_count = 0; - bool execute = true; - bool singleLineSpec = false; - InlineBlockType inlineBlockType = CurlyDelimited; - - line = 1; - column = 1; - lastnl = 0; - - /* Init the section parser and the character scanner. */ - init(); - %% write init; - - /* Set up the start state. FIXME: After 5.20 is released the nocs write - * init option should be used, the main machine eliminated and this statement moved - * above the write init. */ - cs = rlscan_en_main; - - while ( execute ) { - char *p = buf + have; - int space = bufsize - have; - - if ( space == 0 ) { - /* We filled up the buffer trying to scan a token. Grow it. */ - bufsize = bufsize * 2; - char *newbuf = new char[bufsize]; - - /* Recompute p and space. */ - p = newbuf + have; - space = bufsize - have; - - /* Patch up pointers possibly in use. */ - if ( ts != 0 ) - ts = newbuf + ( ts - buf ); - te = newbuf + ( te - buf ); - - /* Copy the new buffer in. */ - memcpy( newbuf, buf, have ); - delete[] buf; - buf = newbuf; - } - - input.read( p, space ); - int len = input.gcount(); - char *pe = p + len; - - /* If we see eof then append the eof var. */ - char *eof = 0; - if ( len == 0 ) { - eof = pe; - execute = false; - } - - %% write exec; - - /* Check if we failed. */ - if ( cs == rlscan_error ) { - /* Machine failed before finding a token. I'm not yet sure if this - * is reachable. */ - id->error(scan_loc()) << "scanner error" << endl; - id->abortCompile( 1 ); - } - - /* Decide if we need to preserve anything. */ - char *preserve = ts; - - /* Now set up the prefix. */ - if ( preserve == 0 ) - have = 0; - else { - /* There is data that needs to be shifted over. */ - have = pe - preserve; - memmove( buf, preserve, have ); - unsigned int shiftback = preserve - buf; - if ( ts != 0 ) - ts -= shiftback; - te -= shiftback; - - preserve = buf; - } - } - - delete[] buf; -} diff --git a/ragel/xml.cc b/ragel/xml.cc deleted file mode 100644 index 861bb89f..00000000 --- a/ragel/xml.cc +++ /dev/null @@ -1,786 +0,0 @@ -/* - * Copyright 2005-2018 Adrian Thurston - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * XML Output not included in 7.0 (yet -- possibly) - */ - -#include "ragel.h" -#include "xml.h" -#include "parsedata.h" -#include "fsmgraph.h" -#include "gendata.h" -#include "inputdata.h" -#include -#include "version.h" - -using std::endl; - -void InputData::processXML() -{ - /* Compiles machines. */ - prepareAllMachines(); - - if ( errorCount > 0 ) - abortCompile( 1 ); - - createOutputStream(); - - if ( errorCount > 0 ) - abortCompile( 1 ); - - /* - * From this point on we should not be reporting any errors. - */ - - openOutput(); - writeXML( *outStream ); - closeOutput(); -} - -XMLCodeGen::XMLCodeGen( std::string fsmName, int machineId, FsmGbl *id, PdBase *pd, FsmAp *fsm, std::ostream &out ) -: - RedBase( id, pd, fsm, fsmName, machineId ), - out(out) -{ -} - -void XMLCodeGen::writeActionList() -{ - /* Determine which actions to write. */ - int nextActionId = 0; - for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { - if ( act->numRefs() > 0 || act->numCondRefs > 0 ) - act->actionId = nextActionId++; - } - - /* Write the list. */ - out << " \n"; - for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { - if ( act->actionId >= 0 ) - writeAction( act ); - } - out << " \n"; -} - -void XMLCodeGen::writeActionTableList() -{ - /* Must first order the action tables based on their id. */ - int numTables = nextActionTableId; - RedActionTable **tables = new RedActionTable*[numTables]; - for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ ) - tables[at->id] = at; - - out << " \n"; - for ( int t = 0; t < numTables; t++ ) { - out << " key.length() << "\">"; - for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) { - out << atel->value->actionId; - if ( ! atel.last() ) - out << " "; - } - out << "\n"; - } - out << " \n"; - - delete[] tables; -} - -void XMLCodeGen::writeKey( Key key ) -{ - if ( keyOps->isSigned ) - out << key.getVal(); - else - out << (unsigned long) key.getVal(); -} - -void XMLCodeGen::writeTrans( Key lowKey, Key highKey, TransAp *trans ) -{ - /* Write the transition. */ - out << " "; - writeKey( lowKey ); - out << " "; - writeKey( highKey ); - - if ( trans->plain() ) { - /* First reduce the action. */ - RedActionTable *actionTable = 0; - if ( trans->tdap()->actionTable.length() > 0 ) - actionTable = actionTableMap.find( trans->tdap()->actionTable ); - - if ( trans->tdap()->toState != 0 ) - out << " " << trans->tdap()->toState->alg.stateNum; - else - out << " x"; - - if ( actionTable != 0 ) - out << " " << actionTable->id; - else - out << " x"; - } - else { - for ( CondList::Iter ctel = trans->tcap()->condList; ctel.lte(); ctel++ ) { - out << ""; - out << trans->tcap()->condSpace->condSpaceId; - - /* First reduce the action. */ - RedActionTable *actionTable = 0; - if ( ctel->actionTable.length() > 0 ) - actionTable = actionTableMap.find( ctel->actionTable ); - - if ( ctel->toState != 0 ) - out << " " << ctel->toState->alg.stateNum; - else - out << " x"; - - if ( actionTable != 0 ) - out << " " << actionTable->id; - else - out << " x"; - - out << ""; - } - } - - out << "\n"; -} - -void XMLCodeGen::writeTransList( StateAp *state ) -{ - TransListVect outList; - - out << " outList.length() << "\">\n"; - - /* If there is only are no ranges the task is simple. */ - if ( state->outList.length() > 0 ) { - /* Loop each source range. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - /* Reduce the transition. If it reduced to anything then add it. */ - appendTrans( outList, trans->lowKey, trans->highKey, trans ); - } - } - - for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ ) - writeTrans( tvi->lowKey, tvi->highKey, tvi->value ); - out << " \n"; -} - -void XMLCodeGen::writeEofTrans( StateAp *state ) -{ - RedActionTable *eofActions = 0; - if ( state->eofActionTable.length() > 0 ) - eofActions = actionTableMap.find( state->eofActionTable ); - - /* The is used when there is an eof target, otherwise the eof - * action goes into state actions. */ - if ( state->eofTarget != 0 ) { - out << " " << state->eofTarget->alg.stateNum; - - if ( eofActions != 0 ) - out << " " << eofActions->id; - else - out << " x"; - - out << "" << endl; - } -} - -void XMLCodeGen::writeText( InlineItem *item ) -{ - if ( item->prev == 0 || item->prev->type != InlineItem::Text ) - out << ""; - xmlEscapeHost( out, item->data.c_str(), item->data.size() ); - if ( item->next == 0 || item->next->type != InlineItem::Text ) - out << ""; -} - -void XMLCodeGen::writeGoto( InlineItem *item ) -{ - if ( pd->generatingSectionSubset ) - out << "-1"; - else { - EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); - out << "" << targ->value->alg.stateNum << ""; - } -} - -void XMLCodeGen::writeCall( InlineItem *item ) -{ - if ( pd->generatingSectionSubset ) - out << "-1"; - else { - EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); - out << "" << targ->value->alg.stateNum << ""; - } -} - -void XMLCodeGen::writeNext( InlineItem *item ) -{ - if ( pd->generatingSectionSubset ) - out << "-1"; - else { - EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); - out << "" << targ->value->alg.stateNum << ""; - } -} - -void XMLCodeGen::writeGotoExpr( InlineItem *item ) -{ - out << ""; - writeInlineList( item->children ); - out << ""; -} - -void XMLCodeGen::writeCallExpr( InlineItem *item ) -{ - out << ""; - writeInlineList( item->children ); - out << ""; -} - -void XMLCodeGen::writeNextExpr( InlineItem *item ) -{ - out << ""; - writeInlineList( item->children ); - out << ""; -} - -void XMLCodeGen::writeEntry( InlineItem *item ) -{ - if ( pd->generatingSectionSubset ) - out << "-1"; - else { - EntryMapEl *targ = fsm->entryPoints.find( item->nameTarg->id ); - out << "" << targ->value->alg.stateNum << ""; - } -} - -void XMLCodeGen::writeActionExec( InlineItem *item ) -{ - out << ""; - writeInlineList( item->children ); - out << ""; -} - -void XMLCodeGen::writeLmOnLast( InlineItem *item ) -{ - out << "1"; - - if ( item->longestMatchPart->action != 0 ) { - out << ""; - writeInlineList( item->longestMatchPart->action->inlineList ); - out << ""; - } -} - -void XMLCodeGen::writeLmOnNext( InlineItem *item ) -{ - out << "0"; - out << ""; - - if ( item->longestMatchPart->action != 0 ) { - out << ""; - writeInlineList( item->longestMatchPart->action->inlineList ); - out << ""; - } -} - -void XMLCodeGen::writeLmOnLagBehind( InlineItem *item ) -{ - out << ""; - - if ( item->longestMatchPart->action != 0 ) { - out << ""; - writeInlineList( item->longestMatchPart->action->inlineList ); - out << ""; - } -} - -void XMLCodeGen::writeLmSwitch( InlineItem *item ) -{ - LongestMatch *longestMatch = item->longestMatch; - out << "\n"; - - /* We can't put the here because we may need to handle the error - * case and in that case p should not be changed. Instead use a default - * label in the switch to adjust p when user actions are not set. An id of - * -1 indicates the default. */ - - if ( longestMatch->lmSwitchHandlesError ) { - /* If the switch handles error then we should have also forced the - * error state. */ - assert( fsm->errState != 0 ); - - out << " "; - out << "" << fsm->errState->alg.stateNum << ""; - out << "\n"; - } - - bool needDefault = false; - for ( LmPartList::Iter lmi = *longestMatch->longestMatchList; lmi.lte(); lmi++ ) { - if ( lmi->inLmSelect ) { - if ( lmi->action == 0 ) - needDefault = true; - else { - /* Open the action. Write it with the context that sets up _p - * when doing control flow changes from inside the machine. */ - out << " longestMatchId << "\">"; - out << ""; - writeInlineList( lmi->action->inlineList ); - out << "\n"; - } - } - } - - if ( needDefault ) { - out << " " - "\n"; - } - - out << " "; -} - -void XMLCodeGen::writeInlineList( InlineList *inlineList ) -{ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - switch ( item->type ) { - case InlineItem::Text: - writeText( item ); - break; - case InlineItem::Goto: - writeGoto( item ); - break; - case InlineItem::GotoExpr: - writeGotoExpr( item ); - break; - case InlineItem::Call: - writeCall( item ); - break; - case InlineItem::CallExpr: - writeCallExpr( item ); - break; - case InlineItem::Next: - writeNext( item ); - break; - case InlineItem::NextExpr: - writeNextExpr( item ); - break; - case InlineItem::Break: - out << ""; - break; - case InlineItem::Ret: - out << ""; - break; - case InlineItem::PChar: - out << ""; - break; - case InlineItem::Char: - out << ""; - break; - case InlineItem::Curs: - out << ""; - break; - case InlineItem::Targs: - out << ""; - break; - case InlineItem::Entry: - writeEntry( item ); - break; - - case InlineItem::Hold: - out << ""; - break; - case InlineItem::Exec: - writeActionExec( item ); - break; - - case InlineItem::LmSetActId: - out << "" << - item->longestMatchPart->longestMatchId << - ""; - break; - case InlineItem::LmSetTokEnd: - out << "1"; - break; - - case InlineItem::LmOnLast: - writeLmOnLast( item ); - break; - case InlineItem::LmOnNext: - writeLmOnNext( item ); - break; - case InlineItem::LmOnLagBehind: - writeLmOnLagBehind( item ); - break; - case InlineItem::LmSwitch: - writeLmSwitch( item ); - break; - - case InlineItem::LmInitAct: - out << ""; - break; - case InlineItem::LmInitTokStart: - out << ""; - break; - case InlineItem::LmSetTokStart: - out << ""; - break; - - /* Stubbed. */ - case InlineItem::Ncall: - case InlineItem::NcallExpr: - case InlineItem::Nret: - case InlineItem::Nbreak: - case InlineItem::Stmt: - case InlineItem::Subst: - case InlineItem::NfaWrapAction: - case InlineItem::NfaWrapConds: - break; - } - } -} - - -void XMLCodeGen::writeAction( Action *action ) -{ - out << " actionId << "\""; - if ( !action->name.empty() ) - out << " name=\"" << action->name << "\""; - out << " line=\"" << action->loc.line << "\" col=\"" << action->loc.col << "\">"; - writeInlineList( action->inlineList ); - out << "\n"; -} - -void xmlEscapeHost( std::ostream &out, const char *data, long len ) -{ - const char *end = data + len; - while ( data != end ) { - switch ( *data ) { - case '<': out << "<"; break; - case '>': out << ">"; break; - case '&': out << "&"; break; - default: out << *data; break; - } - data += 1; - } -} - -void XMLCodeGen::writeStateActions( StateAp *state ) -{ - RedActionTable *toStateActions = 0; - if ( state->toStateActionTable.length() > 0 ) - toStateActions = actionTableMap.find( state->toStateActionTable ); - - RedActionTable *fromStateActions = 0; - if ( state->fromStateActionTable.length() > 0 ) - fromStateActions = actionTableMap.find( state->fromStateActionTable ); - - /* EOF actions go out here only if the state has no eof target. If it has - * an eof target then an eof transition will be used instead. */ - RedActionTable *eofActions = 0; - if ( state->eofTarget == 0 && state->eofActionTable.length() > 0 ) - eofActions = actionTableMap.find( state->eofActionTable ); - - if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) { - out << " "; - if ( toStateActions != 0 ) - out << toStateActions->id; - else - out << "x"; - - if ( fromStateActions != 0 ) - out << " " << fromStateActions->id; - else - out << " x"; - - if ( eofActions != 0 ) - out << " " << eofActions->id; - else - out << " x"; - - out << "\n"; - } -} - -void XMLCodeGen::writeStateList() -{ - /* Write the list of states. */ - out << " stateList.length() << "\">\n"; - for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { - out << " alg.stateNum << "\""; - if ( st->isFinState() ) - out << " final=\"t\""; - out << ">\n"; - - writeStateActions( st ); - writeEofTrans( st ); - writeTransList( st ); - - out << " \n"; - - if ( !st.last() ) - out << "\n"; - } - out << " \n"; -} - -bool XMLCodeGen::writeNameInst( NameInst *nameInst ) -{ - bool written = false; - if ( nameInst->parent != 0 ) - written = writeNameInst( nameInst->parent ); - - if ( !nameInst->name.empty() ) { - if ( written ) - out << '_'; - out << nameInst->name; - written = true; - } - - return written; -} - -void XMLCodeGen::writeEntryPoints() -{ - /* List of entry points other than start state. */ - if ( fsm->entryPoints.length() > 0 || pd->lmRequiresErrorState ) { - out << " lmRequiresErrorState ) - out << " error=\"t\""; - out << ">\n"; - for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { - /* Get the name instantiation from nameIndex. */ - NameInst *nameInst = pd->nameIndex[en->key]; - StateAp *state = en->value; - out << " " << state->alg.stateNum << "\n"; - } - out << " \n"; - } -} - -void XMLCodeGen::writeMachine() -{ - /* Open the machine. */ - out << " \n"; - - /* Action tables. */ - reduceActionTables(); - - writeActionList(); - writeActionTableList(); - writeConditions(); - - /* Start state. */ - out << " " << fsm->startState->alg.stateNum << - "\n"; - - /* Error state. */ - if ( fsm->errState != 0 ) { - out << " " << fsm->errState->alg.stateNum << - "\n"; - } - - writeEntryPoints(); - writeStateList(); - - out << " \n"; -} - - -void XMLCodeGen::writeConditions() -{ - CondData *condData = fsm->ctx->condData; - if ( condData->condSpaceMap.length() > 0 ) { - long nextCondSpaceId = 0; - for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) - cs->condSpaceId = nextCondSpaceId++; - - out << " condSpaceMap.length() << "\">\n"; - for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) { - out << " condSpaceId << - "\" length=\"" << cs->condSet.length() << "\">"; - for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) - out << " " << (*csi)->actionId; - out << "\n"; - } - out << " \n"; - } -} - -void XMLCodeGen::writeExports() -{ - if ( pd->exportList.length() > 0 ) { - out << " \n"; - for ( ExportList::Iter exp = pd->exportList; exp.lte(); exp++ ) { - out << " name << "\">"; - writeKey( exp->key ); - out << "\n"; - } - out << " \n"; - } -} - -void XMLCodeGen::writeXML() -{ - /* Open the definition. */ - out << "\n"; - - /* Alphabet type. */ - out << " " << keyOps->alphType->internalName << "\n"; - - /* Getkey expression. */ - if ( pd->getKeyExpr != 0 ) { - out << " "; - writeInlineList( pd->getKeyExpr ); - out << "\n"; - } - - /* Access expression. */ - if ( pd->accessExpr != 0 ) { - out << " "; - writeInlineList( pd->accessExpr ); - out << "\n"; - } - - /* PrePush expression. */ - if ( pd->prePushExpr != 0 ) { - out << " "; - writeInlineList( pd->prePushExpr->inlineList ); - out << "\n"; - } - - /* PostPop expression. */ - if ( pd->postPopExpr != 0 ) { - out << " "; - writeInlineList( pd->postPopExpr->inlineList ); - out << "\n"; - } - - /* - * Variable expressions. - */ - - if ( pd->pExpr != 0 ) { - out << " "; - writeInlineList( pd->pExpr ); - out << "\n"; - } - - if ( pd->peExpr != 0 ) { - out << " "; - writeInlineList( pd->peExpr ); - out << "\n"; - } - - if ( pd->eofExpr != 0 ) { - out << " "; - writeInlineList( pd->eofExpr ); - out << "\n"; - } - - if ( pd->csExpr != 0 ) { - out << " "; - writeInlineList( pd->csExpr ); - out << "\n"; - } - - if ( pd->topExpr != 0 ) { - out << " "; - writeInlineList( pd->topExpr ); - out << "\n"; - } - - if ( pd->stackExpr != 0 ) { - out << " "; - writeInlineList( pd->stackExpr ); - out << "\n"; - } - - if ( pd->actExpr != 0 ) { - out << " "; - writeInlineList( pd->actExpr ); - out << "\n"; - } - - if ( pd->tokstartExpr != 0 ) { - out << " "; - writeInlineList( pd->tokstartExpr ); - out << "\n"; - } - - if ( pd->tokendExpr != 0 ) { - out << " "; - writeInlineList( pd->tokendExpr ); - out << "\n"; - } - - if ( pd->dataExpr != 0 ) { - out << " "; - writeInlineList( pd->dataExpr ); - out << "\n"; - } - - writeExports(); - - writeMachine(); - - out << - "\n"; -} - -void InputData::writeLanguage( std::ostream &out ) -{ - out << " lang=\""; - switch ( hostLang->lang ) { - case HostLang::C: out << "C"; break; - case HostLang::D: out << "D"; break; - case HostLang::Go: out << "Go"; break; - case HostLang::Java: out << "Java"; break; - case HostLang::Ruby: out << "Ruby"; break; - case HostLang::CSharp: out << "C#"; break; - case HostLang::OCaml: out << "OCaml"; break; - case HostLang::Crack: out << "Crack"; break; - case HostLang::Asm: out << "ASM"; break; - case HostLang::Rust: out << "Rust"; break; - case HostLang::Julia: out << "Julia"; break; - case HostLang::JS: out << "JavaScript"; break; - } - out << "\""; -} - -void InputData::writeXML( std::ostream &out ) -{ - out << "\n"; - - for ( ParseDataDict::Iter pdel = parseDataDict; pdel.lte(); pdel++ ) { - ParseData *pd = pdel->value; - if ( pd->instanceList.length() > 0 ) - pd->generateXML( *outStream ); - } - - out << "\n"; -} diff --git a/ragel/xml.h b/ragel/xml.h deleted file mode 100644 index 60135055..00000000 --- a/ragel/xml.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright 2005-2018 Adrian Thurston - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _XMLCODEGEN_H -#define _XMLCODEGEN_H - -#if 0 - -#include -#include "avltree.h" -#include "fsmgraph.h" -#include "parsedata.h" -#include "redfsm.h" -#include "gendata.h" - -class XMLCodeGen : protected RedBase -{ -public: - XMLCodeGen( std::string fsmName, int machineId, FsmGbl *id, PdBase *pd, FsmAp *fsm, std::ostream &out ); - - void writeXML( ); - -private: - void writeStateActions( StateAp *state ); - void writeStateList(); - - void writeKey( Key key ); - void writeText( InlineItem *item ); - void writeGoto( InlineItem *item ); - void writeGotoExpr( InlineItem *item ); - void writeCall( InlineItem *item ); - void writeCallExpr( InlineItem *item ); - void writeNext( InlineItem *item ); - void writeNextExpr( InlineItem *item ); - void writeEntry( InlineItem *item ); - void writeLmOnLast( InlineItem *item ); - void writeLmOnNext( InlineItem *item ); - void writeLmOnLagBehind( InlineItem *item ); - - void writeExports(); - bool writeNameInst( NameInst *nameInst ); - void writeEntryPoints(); - void writeConditions(); - void writeInlineList( InlineList *inlineList ); - void writeActionList(); - void writeActionTableList(); - void reduceTrans( TransAp *trans ); - void writeTransList( StateAp *state ); - void writeEofTrans( StateAp *state ); - void writeTrans( Key lowKey, Key highKey, TransAp *defTrans ); - void writeAction( Action *action ); - void writeLmSwitch( InlineItem *item ); - void writeMachine(); - void writeActionExec( InlineItem *item ); - - std::ostream &out; -}; - -#endif - -#endif - -- cgit v1.2.1