summaryrefslogtreecommitdiff
path: root/src/pdarun.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/pdarun.c')
-rw-r--r--src/pdarun.c2265
1 files changed, 2265 insertions, 0 deletions
diff --git a/src/pdarun.c b/src/pdarun.c
new file mode 100644
index 00000000..f1885ec6
--- /dev/null
+++ b/src/pdarun.c
@@ -0,0 +1,2265 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pdarun.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "config.h"
+#include "debug.h"
+#include "bytecode.h"
+#include "tree.h"
+#include "pool.h"
+#include "internal.h"
+
+#define act_sb 0x1
+#define act_rb 0x2
+
+#define read_word_p( i, p ) do { \
+ i = ((word_t) p[0]); \
+ i |= ((word_t) p[1]) << 8; \
+ i |= ((word_t) p[2]) << 16; \
+ i |= ((word_t) p[3]) << 24; \
+} while(0)
+
+#define read_tree_p( i, p ) do { \
+ word_t w; \
+ w = ((word_t) p[0]); \
+ w |= ((word_t) p[1]) << 8; \
+ w |= ((word_t) p[2]) << 16; \
+ w |= ((word_t) p[3]) << 24; \
+ i = (tree_t*)w; \
+} while(0)
+
+/* bit 0: data needed. bit 1: loc needed */
+#define RN_NONE 0x0
+#define RN_DATA 0x1
+#define RN_LOC 0x2
+#define RN_BOTH 0x3
+
+
+static void init_fsm_run( program_t *prg, struct pda_run *pda_run )
+{
+ pda_run->fsm_tables = prg->rtd->fsm_tables;
+
+ pda_run->consume_buf = 0;
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->scan_eof = 0;
+
+ pda_run->pre_region = -1;
+}
+
+static void clear_fsm_run( program_t *prg, struct pda_run *pda_run )
+{
+ if ( pda_run->consume_buf != 0 ) {
+ /* Transfer the run buf list to the program */
+ struct run_buf *head = pda_run->consume_buf;
+ struct run_buf *tail = head;
+ while ( tail->next != 0 )
+ tail = tail->next;
+
+ tail->next = prg->alloc_run_buf;
+ prg->alloc_run_buf = head;
+ }
+}
+
+void colm_increment_steps( struct pda_run *pda_run )
+{
+ pda_run->steps += 1;
+ //debug( prg, REALM_PARSE, "steps up to %ld\n", pdaRun->steps );
+}
+
+void colm_decrement_steps( struct pda_run *pda_run )
+{
+ pda_run->steps -= 1;
+ //debug( prg, REALM_PARSE, "steps down to %ld\n", pdaRun->steps );
+}
+
+head_t *colm_stream_pull( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ struct input_impl *is, long length )
+{
+ if ( pda_run != 0 ) {
+ struct run_buf *run_buf = pda_run->consume_buf;
+ if ( length > ( FSM_BUFSIZE - run_buf->length ) ) {
+ run_buf = new_run_buf( 0 );
+ run_buf->next = pda_run->consume_buf;
+ pda_run->consume_buf = run_buf;
+ }
+
+ alph_t *dest = run_buf->data + run_buf->length;
+
+ is->funcs->get_data( prg, is, dest, length );
+ location_t *loc = location_allocate( prg );
+ is->funcs->consume_data( prg, is, length, loc );
+
+ run_buf->length += length;
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+
+ head_t *tokdata = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length );
+ tokdata->location = loc;
+
+ return tokdata;
+ }
+ else {
+ head_t *head = init_str_space( length );
+ alph_t *dest = (alph_t*)head->data;
+
+ is->funcs->get_data( prg, is, dest, length );
+ location_t *loc = location_allocate( prg );
+ is->funcs->consume_data( prg, is, length, loc );
+ head->location = loc;
+
+ return head;
+ }
+}
+
+/* Should only be sending back whole tokens/ignores, therefore the send back
+ * should never cross a buffer boundary. Either we slide back data, or we move to
+ * a previous buffer and slide back data. */
+static void send_back_text( struct colm_program *prg, struct input_impl *is, const alph_t *data, long length )
+{
+ //debug( REALM_PARSE, "push back of %ld characters\n", length );
+
+ if ( length == 0 )
+ return;
+
+ //debug( REALM_PARSE, "sending back text: %.*s\n",
+ // (int)length, data );
+
+ is->funcs->undo_consume_data( prg, is, data, length );
+}
+
+static void send_back_tree( struct colm_program *prg, struct input_impl *is, tree_t *tree )
+{
+ is->funcs->undo_consume_tree( prg, is, tree, false );
+}
+
+/*
+ * Stops on:
+ * PCR_REVERSE
+ */
+static void send_back_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is, parse_tree_t *parse_tree )
+{
+ #ifdef DEBUG
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ debug( prg, REALM_PARSE, "sending back: %s%s\n",
+ lel_info[parse_tree->shadow->tree->id].name,
+ parse_tree->flags & PF_ARTIFICIAL ? " (artificial)" : "" );
+ #endif
+
+ head_t *head = parse_tree->shadow->tree->tokdata;
+ int artificial = parse_tree->flags & PF_ARTIFICIAL;
+
+ if ( head != 0 ) {
+ if ( artificial )
+ send_back_tree( prg, is, parse_tree->shadow->tree );
+ else
+ send_back_text( prg, is, colm_alph_from_cstr( string_data( head ) ), head->length );
+ }
+
+ colm_decrement_steps( pda_run );
+
+ /* Check for reverse code. */
+ if ( parse_tree->flags & PF_HAS_RCODE ) {
+ pda_run->on_deck = true;
+ parse_tree->flags &= ~PF_HAS_RCODE;
+ }
+
+ if ( pda_run->steps == pda_run->target_steps ) {
+ debug( prg, REALM_PARSE, "trigger parse stop, steps = "
+ "target = %d\n", pda_run->target_steps );
+ pda_run->stop = true;
+ }
+}
+
+static void reset_token( struct pda_run *pda_run )
+{
+ /* If there is a token started, but never finished for a lack of data, we
+ * must first backup over it. */
+ if ( pda_run->tokstart != 0 ) {
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->scan_eof = 0;
+ }
+}
+
+/* Stops on:
+ * PCR_REVERSE
+ */
+
+static void send_back( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ struct input_impl *is, parse_tree_t *parse_tree )
+{
+ debug( prg, REALM_PARSE, "sending back: %s\n",
+ prg->rtd->lel_info[parse_tree->id].name );
+
+ if ( parse_tree->flags & PF_NAMED ) {
+ /* Send the named lang el back first, then send back any leading
+ * whitespace. */
+ is->funcs->undo_consume_lang_el( prg, is );
+ }
+
+ colm_decrement_steps( pda_run );
+
+ /* Artifical were not parsed, instead sent in as items. */
+ if ( parse_tree->flags & PF_ARTIFICIAL ) {
+ /* Check for reverse code. */
+ if ( parse_tree->flags & PF_HAS_RCODE ) {
+ debug( prg, REALM_PARSE, "tree has rcode, setting on deck\n" );
+ pda_run->on_deck = true;
+ parse_tree->flags &= ~PF_HAS_RCODE;
+ }
+
+ colm_tree_upref( prg, parse_tree->shadow->tree );
+
+ send_back_tree( prg, is, parse_tree->shadow->tree );
+ }
+ else {
+ /* Check for reverse code. */
+ if ( parse_tree->flags & PF_HAS_RCODE ) {
+ debug( prg, REALM_PARSE, "tree has rcode, setting on deck\n" );
+ pda_run->on_deck = true;
+ parse_tree->flags &= ~PF_HAS_RCODE;
+ }
+
+ /* Push back the token data. */
+ send_back_text( prg, is, colm_alph_from_cstr( string_data( parse_tree->shadow->tree->tokdata ) ),
+ string_length( parse_tree->shadow->tree->tokdata ) );
+
+ /* If eof was just sent back remember that it needs to be sent again. */
+ if ( parse_tree->id == prg->rtd->eof_lel_ids[pda_run->parser_id] )
+ pda_run->eof_term_recvd = false;
+
+ /* If the item is bound then store remove it from the bindings array. */
+ prg->rtd->pop_binding( pda_run, parse_tree );
+ }
+
+ if ( pda_run->steps == pda_run->target_steps ) {
+ debug( prg, REALM_PARSE, "trigger parse stop, "
+ "steps = target = %d\n", pda_run->target_steps );
+ pda_run->stop = true;
+ }
+
+ /* Downref the tree that was sent back and free the kid. */
+ colm_tree_downref( prg, sp, parse_tree->shadow->tree );
+ kid_free( prg, parse_tree->shadow );
+ parse_tree_free( pda_run, parse_tree );
+}
+
+static void set_region( struct pda_run *pda_run, int empty_ignore, parse_tree_t *tree )
+{
+ if ( empty_ignore ) {
+ /* Recording the next region. */
+ tree->retry_region = pda_run->next_region_ind;
+ if ( pda_run->pda_tables->token_regions[tree->retry_region+1] != 0 )
+ pda_run->num_retry += 1;
+ }
+}
+
+static void ignore_tree( program_t *prg, struct pda_run *pda_run, tree_t *tree )
+{
+ int empty_ignore = pda_run->accum_ignore == 0;
+
+ colm_increment_steps( pda_run );
+
+ parse_tree_t *parse_tree = parse_tree_allocate( pda_run );
+ parse_tree->shadow = kid_allocate( prg );
+ parse_tree->shadow->tree = tree;
+
+ parse_tree->next = pda_run->accum_ignore;
+ pda_run->accum_ignore = parse_tree;
+
+ colm_transfer_reverse_code( pda_run, parse_tree );
+
+ if ( pda_run->pre_region >= 0 )
+ parse_tree->flags |= PF_RIGHT_IGNORE;
+
+ set_region( pda_run, empty_ignore, pda_run->accum_ignore );
+}
+
+static void ignore_tree_art( program_t *prg, struct pda_run *pda_run, tree_t *tree )
+{
+ int empty_ignore = pda_run->accum_ignore == 0;
+
+ colm_increment_steps( pda_run );
+
+ parse_tree_t *parse_tree = parse_tree_allocate( pda_run );
+ parse_tree->flags |= PF_ARTIFICIAL;
+ parse_tree->shadow = kid_allocate( prg );
+ parse_tree->shadow->tree = tree;
+
+ parse_tree->next = pda_run->accum_ignore;
+ pda_run->accum_ignore = parse_tree;
+
+ colm_transfer_reverse_code( pda_run, parse_tree );
+
+ set_region( pda_run, empty_ignore, pda_run->accum_ignore );
+}
+
+kid_t *make_token_with_data( program_t *prg, struct pda_run *pda_run,
+ struct input_impl *is, int id, head_t *tokdata )
+{
+ /* Make the token object. */
+ long object_length = prg->rtd->lel_info[id].object_length;
+ kid_t *attrs = alloc_attrs( prg, object_length );
+
+ kid_t *input = 0;
+ input = kid_allocate( prg );
+ input->tree = tree_allocate( prg );
+
+ debug( prg, REALM_PARSE, "made token %p\n", input->tree );
+
+ input->tree->refs = 1;
+ input->tree->id = id;
+ input->tree->tokdata = tokdata;
+
+ /* No children and ignores get added later. */
+ input->tree->child = attrs;
+
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ if ( lel_info[id].num_capture_attr > 0 ) {
+ int i;
+ for ( i = 0; i < lel_info[id].num_capture_attr; i++ ) {
+ CaptureAttr *ca = &prg->rtd->capture_attr[lel_info[id].capture_attr + i];
+ head_t *data = string_alloc_full( prg,
+ colm_cstr_from_alph( pda_run->mark[ca->mark_enter] ),
+ pda_run->mark[ca->mark_leave] -
+ pda_run->mark[ca->mark_enter] );
+ tree_t *string = construct_string( prg, data );
+ colm_tree_upref( prg, string );
+ colm_tree_set_field( prg, input->tree, ca->offset, string );
+ }
+ }
+
+ return input;
+}
+
+static void report_parse_error( program_t *prg, tree_t **sp, struct pda_run *pda_run )
+{
+ kid_t *kid = pda_run->bt_point;
+ head_t *deepest = 0;
+ while ( kid != 0 ) {
+ head_t *head = kid->tree->tokdata;
+ if ( head != 0 && head->location != 0 ) {
+ if ( deepest == 0 || head->location->byte > deepest->location->byte )
+ deepest = head;
+ }
+ kid = kid->next;
+ }
+
+ head_t *error_head = 0;
+
+ /* If there are no error points on record assume the error occurred at the
+ * beginning of the stream. */
+ if ( deepest == 0 ) {
+ error_head = string_alloc_full( prg, "<input>:1:1: parse error", 32 );
+ error_head->location = location_allocate( prg );
+ error_head->location->line = 1;
+ error_head->location->column = 1;
+ }
+ else {
+ debug( prg, REALM_PARSE, "deepest location byte: %d\n",
+ deepest->location->byte );
+
+ const char *name = deepest->location->name;
+ long line = deepest->location->line;
+ long i, column = deepest->location->column;
+ long byte = deepest->location->byte;
+
+ for ( i = 0; i < deepest->length; i++ ) {
+ if ( deepest->data[i] != '\n' )
+ column += 1;
+ else {
+ line += 1;
+ column = 1;
+ }
+ byte += 1;
+ }
+
+ if ( name == 0 )
+ name = "<input>";
+ char *formatted = malloc( strlen( name ) + 128 );
+ sprintf( formatted, "%s:%ld:%ld: parse error", name, line, column );
+ error_head = string_alloc_full( prg, formatted, strlen(formatted) );
+ free( formatted );
+
+ error_head->location = location_allocate( prg );
+
+ error_head->location->name = deepest->location->name;
+ error_head->location->line = line;
+ error_head->location->column = column;
+ error_head->location->byte = byte;
+ }
+
+ tree_t *tree = construct_string( prg, error_head );
+ colm_tree_downref( prg, sp, pda_run->parse_error_text );
+ pda_run->parse_error_text = tree;
+ colm_tree_upref( prg, pda_run->parse_error_text );
+}
+
+static void attach_right_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *parse_tree )
+{
+ if ( pda_run->accum_ignore == 0 )
+ return;
+
+ if ( pda_run->stack_top->id > 0 &&
+ pda_run->stack_top->id < prg->rtd->first_non_term_id )
+ {
+ /* OK, do it */
+ debug( prg, REALM_PARSE, "attaching right ignore\n" );
+
+ /* Reset. */
+ assert( ! ( parse_tree->flags & PF_RIGHT_IL_ATTACHED ) );
+
+ parse_tree_t *accum = pda_run->accum_ignore;
+
+ parse_tree_t *stop_at = 0, *use = accum;
+ while ( use != 0 ) {
+ if ( ! (use->flags & PF_RIGHT_IGNORE) )
+ stop_at = use;
+ use = use->next;
+ }
+
+ if ( stop_at != 0 ) {
+ /* Stop at was set. Make it the last item in the igore list. Take
+ * the rest. */
+ accum = stop_at->next;
+ stop_at->next = 0;
+ }
+ else {
+ /* Stop at was never set. All right ignore. Use it all. */
+ pda_run->accum_ignore = 0;
+ }
+
+ /* The data list needs to be extracted and reversed. The parse tree list
+ * can remain in stack order. */
+ parse_tree_t *child = accum, *last = 0;
+ kid_t *data_child = 0, *data_last = 0;
+
+ while ( child ) {
+ data_child = child->shadow;
+ parse_tree_t *next = child->next;
+
+ /* Reverse the lists. */
+ data_child->next = data_last;
+ child->next = last;
+
+ /* Detach the parse tree from the data tree. */
+ child->shadow = 0;
+
+ /* Keep the last for reversal. */
+ data_last = data_child;
+ last = child;
+
+ child = next;
+ }
+
+ /* Last is now the first. */
+ parse_tree->right_ignore = last;
+
+ if ( data_child != 0 ) {
+ debug( prg, REALM_PARSE, "attaching ignore right\n" );
+
+ kid_t *ignore_kid = data_last;
+
+ /* Copy the ignore list first if we need to attach it as a right
+ * ignore. */
+ tree_t *right_ignore = 0;
+
+ right_ignore = tree_allocate( prg );
+ right_ignore->id = LEL_ID_IGNORE;
+ right_ignore->child = ignore_kid;
+
+ tree_t *push_to = parse_tree->shadow->tree;
+
+ push_to = push_right_ignore( prg, push_to, right_ignore );
+
+ parse_tree->shadow->tree = push_to;
+
+ parse_tree->flags |= PF_RIGHT_IL_ATTACHED;
+ }
+ }
+}
+
+static void attach_left_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *parse_tree )
+{
+ /* Reset. */
+ assert( ! ( parse_tree->flags & PF_LEFT_IL_ATTACHED ) );
+
+ parse_tree_t *accum = pda_run->accum_ignore;
+ pda_run->accum_ignore = 0;
+
+ /* The data list needs to be extracted and reversed. The parse tree list
+ * can remain in stack order. */
+ parse_tree_t *child = accum, *last = 0;
+ kid_t *data_child = 0, *data_last = 0;
+
+ while ( child ) {
+ data_child = child->shadow;
+ parse_tree_t *next = child->next;
+
+ /* Reverse the lists. */
+ data_child->next = data_last;
+ child->next = last;
+
+ /* Detach the parse tree from the data tree. */
+ child->shadow = 0;
+
+ /* Keep the last for reversal. */
+ data_last = data_child;
+ last = child;
+
+ child = next;
+ }
+
+ /* Last is now the first. */
+ parse_tree->left_ignore = last;
+
+ if ( data_child != 0 ) {
+ debug( prg, REALM_PARSE, "attaching left ignore\n" );
+
+ kid_t *ignore_kid = data_child;
+
+ /* Make the ignore list for the left-ignore. */
+ tree_t *left_ignore = tree_allocate( prg );
+ left_ignore->id = LEL_ID_IGNORE;
+ left_ignore->child = ignore_kid;
+
+ tree_t *push_to = parse_tree->shadow->tree;
+
+ push_to = push_left_ignore( prg, push_to, left_ignore );
+
+ parse_tree->shadow->tree = push_to;
+
+ parse_tree->flags |= PF_LEFT_IL_ATTACHED;
+ }
+}
+
+/* Not currently used. Need to revive this. WARNING: untested changes here */
+static void detach_right_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *parse_tree )
+{
+ /* Right ignore are immediately discarded since they are copies of
+ * left-ignores. */
+ tree_t *right_ignore = 0;
+ if ( parse_tree->flags & PF_RIGHT_IL_ATTACHED ) {
+ tree_t *pop_from = parse_tree->shadow->tree;
+
+ pop_from = pop_right_ignore( prg, sp, pop_from, &right_ignore );
+
+ parse_tree->shadow->tree = pop_from;
+
+ parse_tree->flags &= ~PF_RIGHT_IL_ATTACHED;
+ }
+
+ if ( parse_tree->right_ignore != 0 ) {
+ assert( right_ignore != 0 );
+
+ /* Transfer the trees to accumIgnore. */
+ parse_tree_t *ignore = parse_tree->right_ignore;
+ parse_tree->right_ignore = 0;
+
+ kid_t *data_ignore = right_ignore->child;
+ right_ignore->child = 0;
+
+ parse_tree_t *last = 0;
+ kid_t *data_last = 0;
+ while ( ignore != 0 ) {
+ parse_tree_t *next = ignore->next;
+ kid_t *data_next = data_ignore->next;
+
+ /* Put the data trees underneath the parse trees. */
+ ignore->shadow = data_ignore;
+
+ /* Reverse. */
+ ignore->next = last;
+ data_ignore->next = data_last;
+
+ /* Keep last for reversal. */
+ last = ignore;
+ data_last = data_ignore;
+
+ ignore = next;
+ data_ignore = data_next;
+ }
+
+ pda_run->accum_ignore = last;
+
+ colm_tree_downref( prg, sp, right_ignore );
+ }
+}
+
+static void detach_left_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *parse_tree )
+{
+ /* Detach left. */
+ tree_t *left_ignore = 0;
+ if ( parse_tree->flags & PF_LEFT_IL_ATTACHED ) {
+ tree_t *pop_from = parse_tree->shadow->tree;
+
+ pop_from = pop_left_ignore( prg, sp, pop_from, &left_ignore );
+
+ parse_tree->shadow->tree = pop_from;
+
+ parse_tree->flags &= ~PF_LEFT_IL_ATTACHED;
+ }
+
+ if ( parse_tree->left_ignore != 0 ) {
+ assert( left_ignore != 0 );
+
+ /* Transfer the trees to accumIgnore. */
+ parse_tree_t *ignore = parse_tree->left_ignore;
+ parse_tree->left_ignore = 0;
+
+ kid_t *data_ignore = left_ignore->child;
+ left_ignore->child = 0;
+
+ parse_tree_t *last = 0;
+ kid_t *data_last = 0;
+ while ( ignore != 0 ) {
+ parse_tree_t *next = ignore->next;
+ kid_t *data_next = data_ignore->next;
+
+ /* Put the data trees underneath the parse trees. */
+ ignore->shadow = data_ignore;
+
+ /* Reverse. */
+ ignore->next = last;
+ data_ignore->next = data_last;
+
+ /* Keep last for reversal. */
+ last = ignore;
+ data_last = data_ignore;
+
+ ignore = next;
+ data_ignore = data_next;
+ }
+
+ pda_run->accum_ignore = last;
+ }
+
+ colm_tree_downref( prg, sp, left_ignore );
+}
+
+static int is_parser_stop_finished( struct pda_run *pda_run )
+{
+ int done =
+ pda_run->stack_top->next != 0 &&
+ pda_run->stack_top->next->next == 0 &&
+ pda_run->stack_top->id == pda_run->stop_target;
+ return done;
+}
+
+static void handle_error( program_t *prg, tree_t **sp, struct pda_run *pda_run )
+{
+ /* Check the result. */
+ if ( pda_run->parse_error ) {
+ /* Error occured in the top-level parser. */
+ report_parse_error( prg, sp, pda_run );
+ }
+ else {
+ if ( is_parser_stop_finished( pda_run ) ) {
+ debug( prg, REALM_PARSE, "stopping the parse\n" );
+ pda_run->stop_parsing = true;
+ }
+ }
+}
+
+static head_t *extract_match( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is )
+{
+ long length = pda_run->tokend;
+
+ //debug( prg, REALM_PARSE, "extracting token of length: %ld\n", length );
+
+ struct run_buf *run_buf = pda_run->consume_buf;
+ if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) {
+ run_buf = new_run_buf( length );
+ run_buf->next = pda_run->consume_buf;
+ pda_run->consume_buf = run_buf;
+ }
+
+ alph_t *dest = run_buf->data + run_buf->length;
+
+ is->funcs->get_data( prg, is, (alph_t*)dest, length );
+ location_t *location = location_allocate( prg );
+ is->funcs->consume_data( prg, is, length, location );
+
+ run_buf->length += length;
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->tokstart = 0;
+
+ head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length );
+
+ head->location = location;
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", head->location->byte );
+
+ return head;
+}
+
+static head_t *extract_no_d( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is )
+{
+ long length = pda_run->tokend;
+
+ /* Just a consume, no data allocate. */
+ location_t *location = location_allocate( prg );
+ is->funcs->consume_data( prg, is, length, location );
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->tokstart = 0;
+
+ head_t *head = colm_string_alloc_pointer( prg, 0, 0 );
+
+ head->location = location;
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", head->location->byte );
+
+ return head;
+}
+
+static head_t *extract_no_l( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is )
+{
+ long length = pda_run->tokend;
+
+ //debug( prg, REALM_PARSE, "extracting token of length: %ld\n", length );
+
+ struct run_buf *run_buf = pda_run->consume_buf;
+ if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) {
+ run_buf = new_run_buf( length );
+ run_buf->next = pda_run->consume_buf;
+ pda_run->consume_buf = run_buf;
+ }
+
+ alph_t *dest = run_buf->data + run_buf->length;
+
+ is->funcs->get_data( prg, is, dest, length );
+
+ /* Using a dummpy location. */
+ location_t location;
+ memset( &location, 0, sizeof( location ) );
+ is->funcs->consume_data( prg, is, length, &location );
+
+ run_buf->length += length;
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->tokstart = 0;
+
+ head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length );
+
+ /* Don't pass the location. */
+ head->location = 0;
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", location.byte );
+
+ return head;
+}
+
+static head_t *consume_match( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is )
+{
+ long length = pda_run->tokend;
+
+ /* No data or location returned. We just consume the data. */
+ location_t dummy_loc;
+ memset( &dummy_loc, 0, sizeof(dummy_loc) );
+ is->funcs->consume_data( prg, is, length, &dummy_loc );
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->tokstart = 0;
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", dummy_loc.byte );
+
+ return 0;
+}
+
+
+static head_t *peek_match( program_t *prg, struct pda_run *pda_run, struct input_impl *is )
+{
+ long length = pda_run->tokend;
+
+ struct run_buf *run_buf = pda_run->consume_buf;
+ if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) {
+ run_buf = new_run_buf( 0 );
+ run_buf->next = pda_run->consume_buf;
+ pda_run->consume_buf = run_buf;
+ }
+
+ alph_t *dest = run_buf->data + run_buf->length;
+
+ is->funcs->get_data( prg, is, dest, length );
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+
+ head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length );
+
+ head->location = location_allocate( prg );
+ is->funcs->transfer_loc( prg, head->location, is );
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", head->location->byte );
+
+ return head;
+}
+
+
+static void send_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is, long id )
+{
+ if ( prg->rtd->reducer_need_ign( prg, pda_run ) == RN_NONE ) {
+ consume_match( prg, sp, pda_run, is );
+ }
+ else {
+ debug( prg, REALM_PARSE, "ignoring: %s\n", prg->rtd->lel_info[id].name );
+
+ /* Make the ignore string. */
+ head_t *ignore_str = extract_match( prg, sp, pda_run, is );
+
+ debug( prg, REALM_PARSE, "ignoring: %.*s\n", ignore_str->length, ignore_str->data );
+
+ tree_t *tree = tree_allocate( prg );
+ tree->refs = 1;
+ tree->id = id;
+ tree->tokdata = ignore_str;
+
+ /* Send it to the pdaRun. */
+ ignore_tree( prg, pda_run, tree );
+ }
+}
+
+static void send_token( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is, long id )
+{
+ int empty_ignore = pda_run->accum_ignore == 0;
+
+ /* Make the token data. */
+ head_t *tokdata = 0;
+ int rn = prg->rtd->reducer_need_tok( prg, pda_run, id );
+
+ switch ( rn ) {
+ case RN_NONE:
+ tokdata = consume_match( prg, sp, pda_run, is );
+ break;
+ case RN_DATA:
+ tokdata = extract_no_l( prg, sp, pda_run, is );
+ break;
+ case RN_LOC:
+ tokdata = extract_no_d( prg, sp, pda_run, is );
+ break;
+ case RN_BOTH:
+ tokdata = extract_match( prg, sp, pda_run, is );
+ break;
+ }
+
+ debug( prg, REALM_PARSE, "token: %s text: %.*s\n",
+ prg->rtd->lel_info[id].name,
+ string_length(tokdata), string_data(tokdata) );
+
+ kid_t *input = make_token_with_data( prg, pda_run, is, id, tokdata );
+
+ colm_increment_steps( pda_run );
+
+ parse_tree_t *parse_tree = parse_tree_allocate( pda_run );
+ parse_tree->id = input->tree->id;
+ parse_tree->shadow = input;
+
+ pda_run->parse_input = parse_tree;
+
+ /* Store any alternate scanning region. */
+ if ( input != 0 && pda_run->pda_cs >= 0 )
+ set_region( pda_run, empty_ignore, parse_tree );
+}
+
+static void send_tree( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ struct input_impl *is )
+{
+ kid_t *input = kid_allocate( prg );
+ input->tree = is->funcs->consume_tree( prg, is );
+
+ colm_increment_steps( pda_run );
+
+ parse_tree_t *parse_tree = parse_tree_allocate( pda_run );
+ parse_tree->id = input->tree->id;
+ parse_tree->flags |= PF_ARTIFICIAL;
+ parse_tree->shadow = input;
+
+ pda_run->parse_input = parse_tree;
+}
+
+static void send_ignore_tree( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is )
+{
+ tree_t *tree = is->funcs->consume_tree( prg, is );
+ ignore_tree_art( prg, pda_run, tree );
+}
+
+static void send_collect_ignore( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is, int id )
+{
+ debug( prg, REALM_PARSE, "token: CI\n" );
+
+ int empty_ignore = pda_run->accum_ignore == 0;
+
+ /* Make the token data. */
+ head_t *tokdata = head_allocate( prg );
+ tokdata->location = location_allocate( prg );
+ is->funcs->transfer_loc( prg, tokdata->location, is );
+
+ debug( prg, REALM_PARSE, "token: %s text: %.*s\n",
+ prg->rtd->lel_info[id].name,
+ string_length(tokdata), string_data(tokdata) );
+
+ kid_t *input = make_token_with_data( prg, pda_run, is, id, tokdata );
+
+ colm_increment_steps( pda_run );
+
+ parse_tree_t *parse_tree = parse_tree_allocate( pda_run );
+ parse_tree->id = input->tree->id;
+ parse_tree->shadow = input;
+
+ pda_run->parse_input = parse_tree;
+
+ /* Store any alternate scanning region. */
+ if ( input != 0 && pda_run->pda_cs >= 0 )
+ set_region( pda_run, empty_ignore, parse_tree );
+}
+
+/* Offset can be used to look at the next nextRegionInd. */
+static int get_next_region( struct pda_run *pda_run, int offset )
+{
+ return pda_run->pda_tables->token_regions[pda_run->next_region_ind+offset];
+}
+
+static int get_next_pre_region( struct pda_run *pda_run )
+{
+ return pda_run->pda_tables->token_pre_regions[pda_run->next_region_ind];
+}
+
+static void send_eof( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ struct input_impl *is )
+{
+ debug( prg, REALM_PARSE, "token: _EOF\n" );
+
+ colm_increment_steps( pda_run );
+
+ head_t *head = head_allocate( prg );
+ head->location = location_allocate( prg );
+ is->funcs->transfer_loc( prg, head->location, is );
+
+ kid_t *input = kid_allocate( prg );
+ input->tree = tree_allocate( prg );
+
+ input->tree->refs = 1;
+ input->tree->id = prg->rtd->eof_lel_ids[pda_run->parser_id];
+ input->tree->tokdata = head;
+
+ /* Set the state using the state of the parser. */
+ pda_run->region = get_next_region( pda_run, 0 );
+ pda_run->pre_region = get_next_pre_region( pda_run );
+ pda_run->fsm_cs = pda_run->fsm_tables->entry_by_region[pda_run->region];
+
+ parse_tree_t *parse_tree = parse_tree_allocate( pda_run );
+ parse_tree->id = input->tree->id;
+ parse_tree->shadow = input;
+
+ pda_run->parse_input = parse_tree;
+}
+
+static void new_token( program_t *prg, struct pda_run *pda_run )
+{
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->scan_eof = 0;
+
+ /* Init the scanner vars. */
+ pda_run->act = 0;
+ pda_run->tokstart = 0;
+ pda_run->tokend = 0;
+ pda_run->matched_token = 0;
+
+ /* Set the state using the state of the parser. */
+ pda_run->region = get_next_region( pda_run, 0 );
+ pda_run->pre_region = get_next_pre_region( pda_run );
+ if ( pda_run->pre_region > 0 ) {
+ pda_run->fsm_cs = pda_run->fsm_tables->entry_by_region[pda_run->pre_region];
+ pda_run->next_cs = pda_run->fsm_tables->entry_by_region[pda_run->region];
+ }
+ else {
+ pda_run->fsm_cs = pda_run->fsm_tables->entry_by_region[pda_run->region];
+ }
+
+
+ /* Clear the mark array. */
+ memset( pda_run->mark, 0, sizeof(pda_run->mark) );
+}
+
+static void push_bt_point( program_t *prg, struct pda_run *pda_run )
+{
+ tree_t *tree = 0;
+ if ( pda_run->accum_ignore != 0 )
+ tree = pda_run->accum_ignore->shadow->tree;
+ else if ( pda_run->token_list != 0 )
+ tree = pda_run->token_list->kid->tree;
+
+ if ( tree != 0 ) {
+ debug( prg, REALM_PARSE, "pushing bt point with location byte %d\n",
+ ( tree != 0 && tree->tokdata != 0 && tree->tokdata->location != 0 ) ?
+ tree->tokdata->location->byte : 0 );
+
+ kid_t *kid = kid_allocate( prg );
+ kid->tree = tree;
+ colm_tree_upref( prg, tree );
+ kid->next = pda_run->bt_point;
+ pda_run->bt_point = kid;
+ }
+}
+
+
+#define SCAN_UNDO -7
+#define SCAN_IGNORE -6
+#define SCAN_TREE -5
+#define SCAN_TRY_AGAIN_LATER -4
+#define SCAN_ERROR -3
+#define SCAN_LANG_EL -2
+#define SCAN_EOF -1
+
+static long scan_token( program_t *prg, struct pda_run *pda_run, struct input_impl *is )
+{
+ if ( pda_run->trigger_undo )
+ return SCAN_UNDO;
+
+ while ( true ) {
+ alph_t *pd = 0;
+ int len = 0;
+ int tokpref = pda_run->tokpref;
+ int type = is->funcs->get_parse_block( prg, is, &tokpref, &pd, &len );
+
+ switch ( type ) {
+ case INPUT_DATA:
+ pda_run->p = pd;
+ pda_run->pe = pd + len;
+ break;
+
+ case INPUT_EOS:
+ pda_run->p = pda_run->pe = 0;
+ if ( pda_run->tokstart != 0 )
+ pda_run->scan_eof = 1;
+ debug( prg, REALM_SCAN, "EOS *******************\n" );
+ break;
+
+ case INPUT_EOF:
+ pda_run->p = pda_run->pe = 0;
+ if ( pda_run->tokstart != 0 )
+ pda_run->scan_eof = 1;
+ else
+ return SCAN_EOF;
+ break;
+
+ case INPUT_EOD:
+ pda_run->p = pda_run->pe = 0;
+ return SCAN_TRY_AGAIN_LATER;
+
+ case INPUT_LANG_EL:
+ if ( pda_run->tokstart != 0 )
+ pda_run->scan_eof = 1;
+ else
+ return SCAN_LANG_EL;
+ break;
+
+ case INPUT_TREE:
+ if ( pda_run->tokstart != 0 )
+ pda_run->scan_eof = 1;
+ else
+ return SCAN_TREE;
+ break;
+ case INPUT_IGNORE:
+ if ( pda_run->tokstart != 0 )
+ pda_run->scan_eof = 1;
+ else
+ return SCAN_IGNORE;
+ break;
+ }
+
+ prg->rtd->fsm_execute( pda_run, is );
+
+ /* First check if scanning stopped because we have a token. */
+ if ( pda_run->matched_token > 0 ) {
+ /* If the token has a marker indicating the end (due to trailing
+ * context) then adjust data now. */
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+ if ( lel_info[pda_run->matched_token].mark_id >= 0 )
+ pda_run->p = pda_run->mark[lel_info[pda_run->matched_token].mark_id];
+
+ return pda_run->matched_token;
+ }
+
+ /* Check for error. */
+ if ( pda_run->fsm_cs == pda_run->fsm_tables->error_state ) {
+ /* If a token was started, but not finished (tokstart != 0) then
+ * restore data to the beginning of that token. */
+ if ( pda_run->tokstart != 0 )
+ pda_run->p = pda_run->tokstart;
+
+ /* Check for a default token in the region. If one is there
+ * then send it and continue with the processing loop. */
+ if ( prg->rtd->region_info[pda_run->region].default_token >= 0 ) {
+ pda_run->tokpref = 0;
+ return prg->rtd->region_info[pda_run->region].default_token;
+ }
+
+ return SCAN_ERROR;
+ }
+
+ /* Check for no match on eof (trailing data that partially matches a token). */
+ if ( pda_run->scan_eof )
+ return SCAN_ERROR;
+
+ /* Got here because the state machine didn't match a token or encounter
+ * an error. Must be because we got to the end of the buffer data. */
+ assert( pda_run->p == pda_run->pe );
+ }
+
+ /* Should not be reached. */
+ return SCAN_ERROR;
+}
+
+tree_t *get_parsed_root( struct pda_run *pda_run, int stop )
+{
+ if ( pda_run->parse_error )
+ return 0;
+ else if ( stop ) {
+ if ( pda_run->stack_top->shadow != 0 )
+ return pda_run->stack_top->shadow->tree;
+ }
+ else {
+ if ( pda_run->stack_top->next->shadow != 0 )
+ return pda_run->stack_top->next->shadow->tree;
+ }
+ return 0;
+}
+
+static void clear_parse_tree( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *pt )
+{
+ tree_t **top = vm_ptop();
+
+ if ( pt == 0 )
+ return;
+
+free_tree:
+ if ( pt->next != 0 ) {
+ vm_push_ptree( pt->next );
+ }
+
+ if ( pt->left_ignore != 0 ) {
+ vm_push_ptree( pt->left_ignore );
+ }
+
+ if ( pt->child != 0 ) {
+ vm_push_ptree( pt->child );
+ }
+
+ if ( pt->right_ignore != 0 ) {
+ vm_push_ptree( pt->right_ignore );
+ }
+
+ if ( pt->shadow != 0 ) {
+ colm_tree_downref( prg, sp, pt->shadow->tree );
+ kid_free( prg, pt->shadow );
+ }
+
+ parse_tree_free( pda_run, pt );
+
+ /* Any trees to downref? */
+ if ( sp != top ) {
+ pt = vm_pop_ptree();
+ goto free_tree;
+ }
+}
+
+void colm_pda_clear( program_t *prg, tree_t **sp, struct pda_run *pda_run )
+{
+ clear_fsm_run( prg, pda_run );
+
+ /* Remaining stack and parse trees underneath. */
+ clear_parse_tree( prg, sp, pda_run, pda_run->stack_top );
+ pda_run->stack_top = 0;
+
+ /* Traverse the token list downreffing. */
+ ref_t *ref = pda_run->token_list;
+ while ( ref != 0 ) {
+ ref_t *next = ref->next;
+ kid_free( prg, (kid_t*)ref );
+ ref = next;
+ }
+ pda_run->token_list = 0;
+
+ /* Traverse the btPoint list downreffing */
+ kid_t *btp = pda_run->bt_point;
+ while ( btp != 0 ) {
+ kid_t *next = btp->next;
+ colm_tree_downref( prg, sp, btp->tree );
+ kid_free( prg, (kid_t*)btp );
+ btp = next;
+ }
+ pda_run->bt_point = 0;
+
+ /* Clear out any remaining ignores. */
+ clear_parse_tree( prg, sp, pda_run, pda_run->accum_ignore );
+ pda_run->accum_ignore = 0;
+
+ /* Clear the input list (scanned tokes, sent trees). */
+ clear_parse_tree( prg, sp, pda_run, pda_run->parse_input );
+ pda_run->parse_input = 0;
+
+ colm_rcode_downref_all( prg, sp, &pda_run->reverse_code );
+ colm_rt_code_vect_empty( &pda_run->reverse_code );
+ colm_rt_code_vect_empty( &pda_run->rcode_collect );
+
+ colm_tree_downref( prg, sp, pda_run->parse_error_text );
+
+ if ( pda_run->reducer ) {
+ long local_lost = pool_alloc_num_lost( &pda_run->local_pool );
+
+ if ( local_lost )
+ message( "warning: reducer local lost parse trees: %ld\n", local_lost );
+ pool_alloc_clear( &pda_run->local_pool );
+ }
+}
+
+void colm_pda_init( program_t *prg, struct pda_run *pda_run, struct pda_tables *tables,
+ int parser_id, long stop_target, int revert_on, struct_t *context, int reducer )
+{
+ memset( pda_run, 0, sizeof(struct pda_run) );
+
+ pda_run->pda_tables = tables;
+ pda_run->parser_id = parser_id;
+ pda_run->stop_target = stop_target;
+ pda_run->revert_on = revert_on;
+ pda_run->target_steps = -1;
+ pda_run->reducer = reducer;
+
+ /* An initial commit shift count of -1 means we won't ever back up to zero
+ * shifts and think parsing cannot continue. */
+ pda_run->shift_count = 0;
+ pda_run->commit_shift_count = -1;
+
+ if ( reducer ) {
+ init_pool_alloc( &pda_run->local_pool, sizeof(parse_tree_t) +
+ prg->rtd->commit_union_sz(reducer) );
+ pda_run->parse_tree_pool = &pda_run->local_pool;
+ }
+ else {
+ pda_run->parse_tree_pool = &prg->parse_tree_pool;
+ }
+
+ debug( prg, REALM_PARSE, "initializing struct pda_run %s\n",
+ prg->rtd->lel_info[prg->rtd->parser_lel_ids[parser_id]].name );
+
+ /* FIXME: need the right one here. */
+ pda_run->pda_cs = prg->rtd->start_states[pda_run->parser_id];
+
+ kid_t *sentinal = kid_allocate( prg );
+ sentinal->tree = tree_allocate( prg );
+ sentinal->tree->refs = 1;
+
+ /* Init the element allocation variables. */
+ pda_run->stack_top = parse_tree_allocate( pda_run );
+ pda_run->stack_top->state = -1;
+ pda_run->stack_top->shadow = sentinal;
+
+ pda_run->num_retry = 0;
+ pda_run->next_region_ind = pda_run->pda_tables->token_region_inds[pda_run->pda_cs];
+ pda_run->stop_parsing = false;
+ pda_run->accum_ignore = 0;
+ pda_run->bt_point = 0;
+ pda_run->check_next = false;
+ pda_run->check_stop = false;
+
+ prg->rtd->init_bindings( pda_run );
+
+ init_rt_code_vect( &pda_run->reverse_code );
+ init_rt_code_vect( &pda_run->rcode_collect );
+
+ pda_run->context = context;
+ pda_run->parse_error = 0;
+ pda_run->parse_input = 0;
+ pda_run->trigger_undo = 0;
+
+ pda_run->token_id = 0;
+
+ pda_run->on_deck = false;
+ pda_run->parsed = 0;
+ pda_run->reject = false;
+
+ pda_run->rc_block_count = 0;
+ pda_run->eof_term_recvd = 0;
+
+ init_fsm_run( prg, pda_run );
+ new_token( prg, pda_run );
+}
+
+static long stack_top_target( program_t *prg, struct pda_run *pda_run )
+{
+ long state;
+ if ( pda_run->stack_top->state < 0 )
+ state = prg->rtd->start_states[pda_run->parser_id];
+ else {
+ unsigned shift = pda_run->stack_top->id -
+ pda_run->pda_tables->keys[pda_run->stack_top->state<<1];
+ unsigned offset = pda_run->pda_tables->offsets[pda_run->stack_top->state] + shift;
+ int index = pda_run->pda_tables->indices[offset];
+ state = pda_run->pda_tables->targs[index];
+ }
+ return state;
+}
+
+/*
+ * shift: retry goes into lower of shifted node.
+ * reduce: retry goes into upper of reduced node.
+ * shift-reduce: cannot be a retry
+ */
+
+/* Stops on:
+ * PCR_REDUCTION
+ * PCR_REVERSE
+ */
+static long parse_token( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct input_impl *is, long entry )
+{
+ int pos;
+ unsigned int *action;
+ int rhs_len;
+ int owner;
+ int induce_reject;
+ int ind_pos;
+
+ /* COROUTINE */
+ switch ( entry ) {
+ case PCR_START:
+
+ /* The scanner will send a null token if it can't find a token. */
+ if ( pda_run->parse_input == 0 )
+ goto parse_error;
+
+ /* This will cause parseInput to be lost. This
+ * path should be traced. */
+ if ( pda_run->pda_cs < 0 )
+ return PCR_DONE;
+
+ /* Record the state in the parse tree. */
+ pda_run->parse_input->state = pda_run->pda_cs;
+
+again:
+ if ( pda_run->parse_input == 0 )
+ goto _out;
+
+ pda_run->lel = pda_run->parse_input;
+ pda_run->cur_state = pda_run->pda_cs;
+
+ if ( pda_run->lel->id < pda_run->pda_tables->keys[pda_run->cur_state<<1] ||
+ pda_run->lel->id > pda_run->pda_tables->keys[(pda_run->cur_state<<1)+1] )
+ {
+ debug( prg, REALM_PARSE, "parse error, no transition 1\n" );
+ push_bt_point( prg, pda_run );
+ goto parse_error;
+ }
+
+ ind_pos = pda_run->pda_tables->offsets[pda_run->cur_state] +
+ (pda_run->lel->id - pda_run->pda_tables->keys[pda_run->cur_state<<1]);
+
+ owner = pda_run->pda_tables->owners[ind_pos];
+ if ( owner != pda_run->cur_state ) {
+ debug( prg, REALM_PARSE, "parse error, no transition 2\n" );
+ push_bt_point( prg, pda_run );
+ goto parse_error;
+ }
+
+ pos = pda_run->pda_tables->indices[ind_pos];
+ if ( pos < 0 ) {
+ debug( prg, REALM_PARSE, "parse error, no transition 3\n" );
+ push_bt_point( prg, pda_run );
+ goto parse_error;
+ }
+
+ /* Checking complete. */
+
+ induce_reject = false;
+ pda_run->pda_cs = pda_run->pda_tables->targs[pos];
+ action = pda_run->pda_tables->actions + pda_run->pda_tables->act_inds[pos];
+ if ( pda_run->lel->retry_lower )
+ action += pda_run->lel->retry_lower;
+
+ /*
+ * Shift
+ */
+
+ if ( *action & act_sb ) {
+ debug( prg, REALM_PARSE, "shifted: %s\n",
+ prg->rtd->lel_info[pda_run->lel->id].name );
+ /* Consume. */
+ pda_run->parse_input = pda_run->parse_input->next;
+
+ pda_run->lel->state = pda_run->cur_state;
+
+ /* If its a token then attach ignores and record it in the token list
+ * of the next ignore attachment to use. */
+ if ( pda_run->lel->id < prg->rtd->first_non_term_id ) {
+ if ( pda_run->lel->cause_reduce == 0 )
+ attach_right_ignore( prg, sp, pda_run, pda_run->stack_top );
+ }
+
+ pda_run->lel->next = pda_run->stack_top;
+ pda_run->stack_top = pda_run->lel;
+
+ /* If its a token then attach ignores and record it in the token list
+ * of the next ignore attachment to use. */
+ if ( pda_run->lel->id < prg->rtd->first_non_term_id ) {
+ attach_left_ignore( prg, sp, pda_run, pda_run->lel );
+
+ ref_t *ref = (ref_t*)kid_allocate( prg );
+ ref->kid = pda_run->lel->shadow;
+ //colm_tree_upref( prg, pdaRun->tree );
+ ref->next = pda_run->token_list;
+ pda_run->token_list = ref;
+ }
+
+ if ( action[1] == 0 )
+ pda_run->lel->retry_lower = 0;
+ else {
+ debug( prg, REALM_PARSE, "retry: %p\n", pda_run->stack_top );
+ pda_run->lel->retry_lower += 1;
+ assert( pda_run->lel->retry_upper == 0 );
+ /* FIXME: Has the retry already been counted? */
+ pda_run->num_retry += 1;
+ }
+
+ pda_run->shift_count += 1;
+ }
+
+ /*
+ * Commit
+ */
+
+ if ( pda_run->pda_tables->commit_len[pos] != 0 ) {
+ debug( prg, REALM_PARSE, "commit point\n" );
+ pda_run->commit_shift_count = pda_run->shift_count;
+
+ /* Not in a reverting context and the parser result is not used. */
+ if ( pda_run->reducer )
+ commit_reduce( prg, sp, pda_run );
+
+ if ( pda_run->fail_parsing )
+ goto fail;
+
+ }
+
+ /*
+ * Reduce
+ */
+
+ if ( *action & act_rb ) {
+ int r, object_length;
+ parse_tree_t *last, *child;
+ kid_t *attrs;
+ kid_t *data_last, *data_child;
+
+ /* If there was shift don't attach again. */
+ if ( !( *action & act_sb ) && pda_run->lel->id < prg->rtd->first_non_term_id )
+ attach_right_ignore( prg, sp, pda_run, pda_run->stack_top );
+
+ pda_run->reduction = *action >> 2;
+
+ if ( pda_run->parse_input != 0 )
+ pda_run->parse_input->cause_reduce += 1;
+
+ kid_t *value = kid_allocate( prg );
+ value->tree = tree_allocate( prg );
+ value->tree->refs = 1;
+ value->tree->id = prg->rtd->prod_info[pda_run->reduction].lhs_id;
+ value->tree->prod_num = prg->rtd->prod_info[pda_run->reduction].prod_num;
+
+ pda_run->red_lel = parse_tree_allocate( pda_run );
+ pda_run->red_lel->id = prg->rtd->prod_info[pda_run->reduction].lhs_id;
+ pda_run->red_lel->next = 0;
+ pda_run->red_lel->cause_reduce = 0;
+ pda_run->red_lel->retry_lower = 0;
+ pda_run->red_lel->shadow = value;
+
+ /* Transfer. */
+ pda_run->red_lel->retry_upper = pda_run->lel->retry_lower;
+ pda_run->lel->retry_lower = 0;
+
+ /* Allocate the attributes. */
+ object_length = prg->rtd->lel_info[pda_run->red_lel->id].object_length;
+ attrs = alloc_attrs( prg, object_length );
+
+ /* Build the list of children. We will be giving up a reference when we
+ * detach parse tree and data tree, but gaining the reference when we
+ * put the children under the new data tree. No need to alter refcounts
+ * here. */
+ rhs_len = prg->rtd->prod_info[pda_run->reduction].length;
+ child = last = 0;
+ data_child = data_last = 0;
+ for ( r = 0; r < rhs_len; r++ ) {
+
+ /* The child. */
+ child = pda_run->stack_top;
+ data_child = child->shadow;
+
+ /* Pop. */
+ pda_run->stack_top = pda_run->stack_top->next;
+
+ /* Detach the parse tree from the data. */
+ child->shadow = 0;
+
+ /* Reverse list. */
+ child->next = last;
+ data_child->next = data_last;
+
+ /* Track last for reversal. */
+ last = child;
+ data_last = data_child;
+ }
+
+ pda_run->red_lel->child = child;
+ pda_run->red_lel->shadow->tree->child = kid_list_concat( attrs, data_child );
+
+ debug( prg, REALM_PARSE, "reduced: %s rhsLen %d\n",
+ prg->rtd->prod_info[pda_run->reduction].name, rhs_len );
+ if ( action[1] == 0 )
+ pda_run->red_lel->retry_upper = 0;
+ else {
+ pda_run->red_lel->retry_upper += 1;
+ assert( pda_run->lel->retry_lower == 0 );
+ pda_run->num_retry += 1;
+ debug( prg, REALM_PARSE, "retry: %p\n", pda_run->red_lel );
+ }
+
+ /* When the production is of zero length we stay in the same state.
+ * Otherwise we use the state stored in the first child. */
+ pda_run->pda_cs = rhs_len == 0 ? pda_run->cur_state : child->state;
+
+ if ( prg->ctx_dep_parsing && prg->rtd->prod_info[pda_run->reduction].frame_id >= 0 ) {
+ /* Frame info for reduction. */
+ pda_run->fi = &prg->rtd->frame_info[prg->rtd->prod_info[pda_run->reduction].frame_id];
+ pda_run->frame_id = prg->rtd->prod_info[pda_run->reduction].frame_id;
+ pda_run->reject = false;
+ pda_run->parsed = 0;
+ pda_run->code = pda_run->fi->codeWV;
+
+ /* COROUTINE */
+ return PCR_REDUCTION;
+ case PCR_REDUCTION:
+
+ if ( prg->induce_exit )
+ goto fail;
+
+ /* If the lhs was stored and it changed then we need to restore the
+ * original upon backtracking, otherwise downref since we took a
+ * copy above. */
+ if ( pda_run->parsed != 0 ) {
+ if ( pda_run->parsed != pda_run->red_lel->shadow->tree ) {
+ debug( prg, REALM_PARSE, "lhs tree was modified, "
+ "adding a restore instruction\n" );
+//
+// /* Make it into a parse tree. */
+// tree_t *newPt = prepParseTree( prg, sp, pdaRun->redLel->tree );
+// colm_tree_downref( prg, sp, pdaRun->redLel->tree );
+//
+// /* Copy it in. */
+// pdaRun->redLel->tree = newPt;
+// colm_tree_upref( prg, pdaRun->redLel->tree );
+
+ /* Add the restore instruct. */
+ append_code_val( &pda_run->rcode_collect, IN_RESTORE_LHS );
+ append_word( &pda_run->rcode_collect, (word_t)pda_run->parsed );
+ append_code_val( &pda_run->rcode_collect, SIZEOF_CODE + SIZEOF_WORD );
+ }
+ else {
+ /* Not changed. Done with parsed. */
+ colm_tree_downref( prg, sp, pda_run->parsed );
+ }
+ pda_run->parsed = 0;
+ }
+
+ /* Pull out the reverse code, if any. */
+ colm_make_reverse_code( pda_run );
+ colm_transfer_reverse_code( pda_run, pda_run->red_lel );
+
+ /* Perhaps the execution environment is telling us we need to
+ * reject the reduction. */
+ induce_reject = pda_run->reject;
+ }
+
+ /* If the left hand side was replaced then the only parse algorithm
+ * data that is contained in it will the PF_HAS_RCODE flag. Everthing
+ * else will be in the original. This requires that we restore first
+ * when going backwards and when doing a commit. */
+
+ if ( induce_reject ) {
+ debug( prg, REALM_PARSE, "error induced during reduction of %s\n",
+ prg->rtd->lel_info[pda_run->red_lel->id].name );
+ pda_run->red_lel->state = pda_run->cur_state;
+ pda_run->red_lel->next = pda_run->stack_top;
+ pda_run->stack_top = pda_run->red_lel;
+ /* FIXME: What is the right argument here? */
+ push_bt_point( prg, pda_run );
+ goto parse_error;
+ }
+
+ pda_run->red_lel->next = pda_run->parse_input;
+ pda_run->parse_input = pda_run->red_lel;
+ }
+
+ goto again;
+
+parse_error:
+ debug( prg, REALM_PARSE, "hit error, backtracking\n" );
+
+#if 0
+ if ( pda_run->num_retry == 0 ) {
+ debug( prg, REALM_PARSE, "out of retries failing parse\n" );
+ goto fail;
+ }
+#endif
+
+ while ( 1 ) {
+ if ( pda_run->on_deck ) {
+ debug( prg, REALM_BYTECODE, "dropping out for reverse code call\n" );
+
+ pda_run->frame_id = -1;
+ pda_run->code = colm_pop_reverse_code( &pda_run->reverse_code );
+
+ /* COROUTINE */
+ return PCR_REVERSE;
+ case PCR_REVERSE:
+
+ colm_decrement_steps( pda_run );
+ }
+ else if ( pda_run->check_next ) {
+ pda_run->check_next = false;
+
+ if ( pda_run->next > 0 && pda_run->pda_tables->token_regions[pda_run->next] != 0 ) {
+ debug( prg, REALM_PARSE, "found a new region\n" );
+ pda_run->num_retry -= 1;
+ pda_run->pda_cs = stack_top_target( prg, pda_run );
+ pda_run->next_region_ind = pda_run->next;
+ return PCR_DONE;
+ }
+ }
+ else if ( pda_run->check_stop ) {
+ pda_run->check_stop = false;
+
+ if ( pda_run->stop ) {
+ debug( prg, REALM_PARSE, "stopping the backtracking, "
+ "steps is %d\n", pda_run->steps );
+
+ pda_run->pda_cs = stack_top_target( prg, pda_run );
+ goto _out;
+ }
+ }
+ else if ( pda_run->parse_input != 0 ) {
+ /* Either we are dealing with a terminal that was shifted or a
+ * nonterminal that was reduced. */
+ if ( pda_run->parse_input->id < prg->rtd->first_non_term_id ) {
+ /* This is a terminal. */
+ assert( pda_run->parse_input->retry_upper == 0 );
+
+ if ( pda_run->parse_input->retry_lower != 0 ) {
+ debug( prg, REALM_PARSE, "found retry targ: %p\n", pda_run->parse_input );
+
+ pda_run->num_retry -= 1;
+ pda_run->pda_cs = pda_run->parse_input->state;
+ goto again;
+ }
+
+ if ( pda_run->parse_input->cause_reduce != 0 ) {
+ /* The terminal caused a reduce. Unshift the reduced thing
+ * (will unreduce in the next step. */
+ if ( pda_run->shift_count == pda_run->commit_shift_count ) {
+ debug( prg, REALM_PARSE, "backed up to commit point, "
+ "failing parse\n" );
+ goto fail;
+ }
+ pda_run->shift_count -= 1;
+
+ pda_run->undo_lel = pda_run->stack_top;
+
+ /* Check if we've arrived at the stack sentinal. This guard
+ * is here to allow us to initially set numRetry to one to
+ * cause the parser to backup all the way to the beginning
+ * when an error occurs. */
+ if ( pda_run->undo_lel->next == 0 )
+ break;
+
+ /* Either we are dealing with a terminal that was
+ * shifted or a nonterminal that was reduced. */
+ assert( !(pda_run->stack_top->id < prg->rtd->first_non_term_id) );
+
+ debug( prg, REALM_PARSE, "backing up over non-terminal: %s\n",
+ prg->rtd->lel_info[pda_run->stack_top->id].name );
+
+ /* Pop the item from the stack. */
+ pda_run->stack_top = pda_run->stack_top->next;
+
+ /* Queue it as next parseInput item. */
+ pda_run->undo_lel->next = pda_run->parse_input;
+ pda_run->parse_input = pda_run->undo_lel;
+ }
+ else {
+ long region = pda_run->parse_input->retry_region;
+ pda_run->next = region > 0 ? region + 1 : 0;
+ pda_run->check_next = true;
+ pda_run->check_stop = true;
+
+ send_back( prg, sp, pda_run, is, pda_run->parse_input );
+
+ pda_run->parse_input = 0;
+ }
+ }
+ else if ( pda_run->parse_input->flags & PF_HAS_RCODE ) {
+ debug( prg, REALM_PARSE, "tree has rcode, setting on deck\n" );
+ pda_run->on_deck = true;
+ pda_run->parsed = 0;
+
+ /* Only the RCODE flag was in the replaced lhs. All the rest is in
+ * the the original. We read it after restoring. */
+
+ pda_run->parse_input->flags &= ~PF_HAS_RCODE;
+ }
+ else {
+ /* Remove it from the input queue. */
+ pda_run->undo_lel = pda_run->parse_input;
+ pda_run->parse_input = pda_run->parse_input->next;
+
+ /* Extract children from the child list. */
+ parse_tree_t *first = pda_run->undo_lel->child;
+ pda_run->undo_lel->child = 0;
+
+ /* This will skip the ignores/attributes, etc. */
+ kid_t *data_first = tree_extract_child( prg, pda_run->undo_lel->shadow->tree );
+
+ /* Walk the child list and and push the items onto the parsing
+ * stack one at a time. */
+ while ( first != 0 ) {
+ /* Get the next item ahead of time. */
+ parse_tree_t *next = first->next;
+ kid_t *data_next = data_first->next;
+
+ /* Push onto the stack. */
+ first->next = pda_run->stack_top;
+ pda_run->stack_top = first;
+
+ /* Reattach the data and the parse tree. */
+ first->shadow = data_first;
+
+ first = next;
+ data_first = data_next;
+ }
+
+ /* If there is an parseInput queued, this is one less reduction it has
+ * caused. */
+ if ( pda_run->parse_input != 0 )
+ pda_run->parse_input->cause_reduce -= 1;
+
+ if ( pda_run->undo_lel->retry_upper != 0 ) {
+ /* There is always an parseInput item here because reduce
+ * conflicts only happen on a lookahead character. */
+ assert( pda_run->parse_input != pda_run->undo_lel );
+ assert( pda_run->parse_input != 0 );
+ assert( pda_run->undo_lel->retry_lower == 0 );
+ assert( pda_run->parse_input->retry_upper == 0 );
+
+ /* Transfer the retry from undoLel to parseInput. */
+ pda_run->parse_input->retry_lower = pda_run->undo_lel->retry_upper;
+ pda_run->parse_input->retry_upper = 0;
+ pda_run->parse_input->state = stack_top_target( prg, pda_run );
+ }
+
+ /* Free the reduced item. */
+ colm_tree_downref( prg, sp, pda_run->undo_lel->shadow->tree );
+ kid_free( prg, pda_run->undo_lel->shadow );
+ parse_tree_free( pda_run, pda_run->undo_lel );
+
+ /* If the stacktop had right ignore attached, detach now. */
+ if ( pda_run->stack_top->flags & PF_RIGHT_IL_ATTACHED )
+ detach_right_ignore( prg, sp, pda_run, pda_run->stack_top );
+ }
+ }
+ else if ( pda_run->accum_ignore != 0 ) {
+ debug( prg, REALM_PARSE, "have accumulated ignore to undo\n" );
+
+ /* Send back any accumulated ignore tokens, then trigger error
+ * in the the parser. */
+ parse_tree_t *ignore = pda_run->accum_ignore;
+ pda_run->accum_ignore = pda_run->accum_ignore->next;
+ ignore->next = 0;
+
+ long region = ignore->retry_region;
+ pda_run->next = region > 0 ? region + 1 : 0;
+ pda_run->check_next = true;
+ pda_run->check_stop = true;
+
+ send_back_ignore( prg, sp, pda_run, is, ignore );
+
+ colm_tree_downref( prg, sp, ignore->shadow->tree );
+ kid_free( prg, ignore->shadow );
+ parse_tree_free( pda_run, ignore );
+ }
+ else {
+ if ( pda_run->shift_count == pda_run->commit_shift_count ) {
+ debug( prg, REALM_PARSE, "backed up to commit point, failing parse\n" );
+ goto fail;
+ }
+
+ pda_run->shift_count -= 1;
+
+ /* Now it is time to undo something. Pick an element from the top of
+ * the stack. */
+ pda_run->undo_lel = pda_run->stack_top;
+
+ /* Check if we've arrived at the stack sentinal. This guard is
+ * here to allow us to initially set numRetry to one to cause the
+ * parser to backup all the way to the beginning when an error
+ * occurs. */
+ if ( pda_run->undo_lel->next == 0 )
+ break;
+
+ /* Either we are dealing with a terminal that was
+ * shifted or a nonterminal that was reduced. */
+ if ( pda_run->stack_top->id < prg->rtd->first_non_term_id ) {
+ debug( prg, REALM_PARSE, "backing up over effective terminal: %s\n",
+ prg->rtd->lel_info[pda_run->stack_top->id].name );
+
+ /* Pop the item from the stack. */
+ pda_run->stack_top = pda_run->stack_top->next;
+
+ /* Queue it as next parseInput item. */
+ pda_run->undo_lel->next = pda_run->parse_input;
+ pda_run->parse_input = pda_run->undo_lel;
+
+ /* Pop from the token list. */
+ ref_t *ref = pda_run->token_list;
+ pda_run->token_list = ref->next;
+ kid_free( prg, (kid_t*)ref );
+
+ assert( pda_run->accum_ignore == 0 );
+ detach_left_ignore( prg, sp, pda_run, pda_run->parse_input );
+ }
+ else {
+ debug( prg, REALM_PARSE, "backing up over non-terminal: %s\n",
+ prg->rtd->lel_info[pda_run->stack_top->id].name );
+
+ /* Pop the item from the stack. */
+ pda_run->stack_top = pda_run->stack_top->next;
+
+ /* Queue it as next parseInput item. */
+ pda_run->undo_lel->next = pda_run->parse_input;
+ pda_run->parse_input = pda_run->undo_lel;
+ }
+
+ /* Undo attach of right ignore. */
+ if ( pda_run->stack_top->flags & PF_RIGHT_IL_ATTACHED )
+ detach_right_ignore( prg, sp, pda_run, pda_run->stack_top );
+ }
+ }
+
+fail:
+ pda_run->pda_cs = -1;
+ pda_run->parse_error = 1;
+
+ /* FIXME: do we still need to fall through here? A fail is permanent now,
+ * no longer called into again. */
+
+ return PCR_DONE;
+
+_out:
+ pda_run->next_region_ind = pda_run->pda_tables->token_region_inds[pda_run->pda_cs];
+
+ /* COROUTINE */
+ case PCR_DONE:
+ break; }
+
+ return PCR_DONE;
+}
+
+/*
+ * colm_parse_loop
+ *
+ * Stops on:
+ * PCR_PRE_EOF
+ * PCR_GENERATION
+ * PCR_REDUCTION
+ * PCR_REVERSE
+ */
+
+long colm_parse_loop( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ struct input_impl *is, long entry )
+{
+ struct lang_el_info *lel_info = prg->rtd->lel_info;
+
+ /* COROUTINE */
+ switch ( entry ) {
+ case PCR_START:
+
+ pda_run->stop = false;
+
+ while ( true ) {
+ debug( prg, REALM_PARSE, "parse loop start\n" );
+
+ /* Pull the current scanner from the parser. This can change during
+ * parsing due to inputStream pushes, usually for the purpose of includes.
+ * */
+ pda_run->token_id = scan_token( prg, pda_run, is );
+
+ if ( pda_run->token_id == SCAN_ERROR ) {
+ if ( pda_run->pre_region >= 0 ) {
+ pda_run->pre_region = -1;
+ pda_run->fsm_cs = pda_run->next_cs;
+ pda_run->tokpref = 0;
+ continue;
+ }
+ }
+
+ if ( pda_run->token_id == SCAN_ERROR &&
+ ( prg->rtd->region_info[pda_run->region].ci_lel_id > 0 ) )
+ {
+ debug( prg, REALM_PARSE, "sending a collect ignore\n" );
+ send_collect_ignore( prg, sp, pda_run, is,
+ prg->rtd->region_info[pda_run->region].ci_lel_id );
+ goto yes;
+ }
+
+ if ( pda_run->token_id == SCAN_TRY_AGAIN_LATER ) {
+ debug( prg, REALM_PARSE, "scanner says try again later\n" );
+ break;
+ }
+
+ assert( pda_run->parse_input == 0 );
+ pda_run->parse_input = 0;
+
+ /* Check for EOF. */
+ if ( pda_run->token_id == SCAN_EOF ) {
+ pda_run->eof_term_recvd = true;
+ send_eof( prg, sp, pda_run, is );
+
+ pda_run->frame_id = prg->rtd->region_info[pda_run->region].eof_frame_id;
+
+ if ( prg->ctx_dep_parsing && pda_run->frame_id >= 0 ) {
+ debug( prg, REALM_PARSE, "HAVE PRE_EOF BLOCK\n" );
+
+ pda_run->fi = &prg->rtd->frame_info[pda_run->frame_id];
+ pda_run->code = pda_run->fi->codeWV;
+
+ /* COROUTINE */
+ return PCR_PRE_EOF;
+ case PCR_PRE_EOF:
+
+ colm_make_reverse_code( pda_run );
+ }
+ }
+ else if ( pda_run->token_id == SCAN_UNDO ) {
+ /* Fall through with parseInput = 0. FIXME: Do we need to send back ignore? */
+ debug( prg, REALM_PARSE, "invoking undo from the scanner\n" );
+ }
+ else if ( pda_run->token_id == SCAN_ERROR ) {
+ /* Scanner error, maybe retry. */
+ if ( pda_run->accum_ignore == 0 && get_next_region( pda_run, 1 ) != 0 ) {
+ debug( prg, REALM_PARSE, "scanner failed, trying next region\n" );
+
+ pda_run->next_region_ind += 1;
+ goto skip_send;
+ }
+ else { // if ( pdaRun->numRetry > 0 ) {
+ debug( prg, REALM_PARSE, "invoking parse error from the scanner\n" );
+
+ /* Fall through to send null (error). */
+ push_bt_point( prg, pda_run );
+ }
+#if 0
+ else {
+ debug( prg, REALM_PARSE, "no alternate scanning regions\n" );
+
+ /* There are no alternative scanning regions to try, nor are
+ * there any alternatives stored in the current parse tree. No
+ * choice but to end the parse. */
+ push_bt_point( prg, pda_run );
+
+ report_parse_error( prg, sp, pda_run );
+ pda_run->parse_error = 1;
+ goto skip_send;
+ }
+#endif
+ }
+ else if ( pda_run->token_id == SCAN_LANG_EL ) {
+ debug( prg, REALM_PARSE, "sending an named lang el\n" );
+
+ /* A named language element (parsing colm program). */
+ prg->rtd->send_named_lang_el( prg, sp, pda_run, is );
+ }
+ else if ( pda_run->token_id == SCAN_TREE ) {
+ debug( prg, REALM_PARSE, "sending a tree\n" );
+
+ /* A tree already built. */
+ send_tree( prg, sp, pda_run, is );
+ }
+ else if ( pda_run->token_id == SCAN_IGNORE ) {
+ debug( prg, REALM_PARSE, "sending an ignore token\n" );
+
+ /* A tree to ignore. */
+ send_ignore_tree( prg, sp, pda_run, is );
+ goto skip_send;
+ }
+ else if ( prg->ctx_dep_parsing && lel_info[pda_run->token_id].frame_id >= 0 ) {
+ /* Has a generation action. */
+ debug( prg, REALM_PARSE, "token gen action: %s\n",
+ prg->rtd->lel_info[pda_run->token_id].name );
+
+ /* Make the token data. */
+ pda_run->tokdata = peek_match( prg, pda_run, is );
+
+ /* Note that we don't update the position now. It is done when the token
+ * data is pulled from the inputStream. */
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->tokpref = 0;
+ pda_run->scan_eof = 0;
+
+ pda_run->fi = &prg->rtd->frame_info[prg->rtd->lel_info[pda_run->token_id].frame_id];
+ pda_run->frame_id = prg->rtd->lel_info[pda_run->token_id].frame_id;
+ pda_run->code = pda_run->fi->codeWV;
+
+ /* COROUTINE */
+ return PCR_GENERATION;
+ case PCR_GENERATION:
+
+ colm_make_reverse_code( pda_run );
+
+ /* Finished with the match text. */
+ string_free( prg, pda_run->tokdata );
+
+ goto skip_send;
+ }
+ else if ( lel_info[pda_run->token_id].ignore ) {
+ debug( prg, REALM_PARSE, "sending an ignore token: %s\n",
+ prg->rtd->lel_info[pda_run->token_id].name );
+
+ /* Is an ignore token. */
+ send_ignore( prg, sp, pda_run, is, pda_run->token_id );
+ goto skip_send;
+ }
+ else {
+ debug( prg, REALM_PARSE, "sending a plain old token: %s\n",
+ prg->rtd->lel_info[pda_run->token_id].name );
+
+ /* Is a plain token. */
+ send_token( prg, sp, pda_run, is, pda_run->token_id );
+ }
+yes:
+
+ if ( pda_run->parse_input != 0 )
+ colm_transfer_reverse_code( pda_run, pda_run->parse_input );
+
+ if ( pda_run->parse_input != 0 ) {
+ /* If it's a nonterminal with a termdup then flip the parse tree to
+ * the terminal. */
+ if ( pda_run->parse_input->id >= prg->rtd->first_non_term_id ) {
+ pda_run->parse_input->id =
+ prg->rtd->lel_info[pda_run->parse_input->id].term_dup_id;
+ pda_run->parse_input->flags |= PF_TERM_DUP;
+ }
+ }
+
+ long pcr = parse_token( prg, sp, pda_run, is, PCR_START );
+
+ while ( pcr != PCR_DONE ) {
+
+ /* COROUTINE */
+ return pcr;
+ case PCR_REDUCTION:
+ case PCR_REVERSE:
+
+ pcr = parse_token( prg, sp, pda_run, is, entry );
+ }
+
+ assert( pcr == PCR_DONE );
+
+ handle_error( prg, sp, pda_run );
+
+skip_send:
+ new_token( prg, pda_run );
+
+ /* Various stop conditions. This should all be coverned by one test
+ * eventually. */
+
+ if ( pda_run->trigger_undo ) {
+ debug( prg, REALM_PARSE, "parsing stopped by triggerUndo\n" );
+ break;
+ }
+
+ if ( pda_run->eof_term_recvd ) {
+ debug( prg, REALM_PARSE, "parsing stopped by EOF\n" );
+ break;
+ }
+
+ if ( pda_run->stop_parsing ) {
+ debug( prg, REALM_PARSE, "scanner has been stopped\n" );
+ break;
+ }
+
+ if ( pda_run->stop ) {
+ debug( prg, REALM_PARSE, "parsing has been stopped by consumedCount\n" );
+ break;
+ }
+
+ if ( prg->induce_exit ) {
+ debug( prg, REALM_PARSE, "parsing has been stopped by a call to exit\n" );
+ break;
+ }
+
+ if ( pda_run->parse_error ) {
+ debug( prg, REALM_PARSE, "parsing stopped by a parse error\n" );
+ break;
+ }
+
+ /* Disregard any alternate parse paths, just go right to failure. */
+ if ( pda_run->fail_parsing ) {
+ debug( prg, REALM_PARSE, "parsing failed by explicit request\n" );
+ break;
+ }
+ }
+
+ /* COROUTINE */
+ case PCR_DONE:
+ break; }
+
+ return PCR_DONE;
+}
+
+
+long colm_parse_frag( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, input_t *input, long entry )
+{
+ /* COROUTINE */
+ switch ( entry ) {
+ case PCR_START:
+
+ if ( ! pda_run->parse_error ) {
+ long pcr = colm_parse_loop( prg, sp, pda_run,
+ input_to_impl( input ), entry );
+
+ while ( pcr != PCR_DONE ) {
+
+ /* COROUTINE */
+ return pcr;
+ case PCR_REDUCTION:
+ case PCR_GENERATION:
+ case PCR_PRE_EOF:
+ case PCR_REVERSE:
+
+ pcr = colm_parse_loop( prg, sp, pda_run,
+ input_to_impl( input ), entry );
+ }
+ }
+
+ /* COROUTINE */
+ case PCR_DONE:
+ break; }
+
+ return PCR_DONE;
+}
+
+long colm_parse_undo_frag( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ input_t *input, long entry, long steps )
+{
+ debug( prg, REALM_PARSE,
+ "undo parse frag, target steps: %ld, pdarun steps: %ld\n",
+ steps, pda_run->steps );
+
+ reset_token( pda_run );
+
+ /* COROUTINE */
+ switch ( entry ) {
+ case PCR_START:
+
+ if ( steps < pda_run->steps ) {
+ /* Setup environment for going backwards until we reduced steps to
+ * what we want. */
+ pda_run->num_retry += 1;
+ pda_run->target_steps = steps;
+ pda_run->trigger_undo = 1;
+
+ /* The parse loop will recognise the situation. */
+ long pcr = colm_parse_loop( prg, sp, pda_run, input_to_impl(input), entry );
+ while ( pcr != PCR_DONE ) {
+
+ /* COROUTINE */
+ return pcr;
+ case PCR_REDUCTION:
+ case PCR_GENERATION:
+ case PCR_PRE_EOF:
+ case PCR_REVERSE:
+
+ pcr = colm_parse_loop( prg, sp, pda_run, input_to_impl(input), entry );
+ }
+
+ /* Reset environment. */
+ pda_run->trigger_undo = 0;
+ pda_run->target_steps = -1;
+ pda_run->num_retry -= 1;
+ }
+
+ /* COROUTINE */
+ case PCR_DONE:
+ break; }
+
+ return PCR_DONE;
+}
+
+void colm_parse_reduce_commit( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run )
+{
+ /* Flush out anything not committed. */
+ if ( pda_run->reducer )
+ commit_reduce( prg, sp, pda_run );
+}
+