summaryrefslogtreecommitdiff
path: root/src/pdarun.h
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@colm.net>2020-03-14 15:29:52 +0200
committerAdrian Thurston <thurston@colm.net>2020-03-14 15:29:52 +0200
commitf653735830d537715f2885bd832cf04851d35401 (patch)
tree95e6551e39407543366d4f49aedf7b78c6e8bbe1 /src/pdarun.h
parentbcc54d5df10cf425e7134b06f70d7ffe1abee4e4 (diff)
downloadcolm-f653735830d537715f2885bd832cf04851d35401.tar.gz
moved source files into commit repository
Diffstat (limited to 'src/pdarun.h')
-rw-r--r--src/pdarun.h471
1 files changed, 471 insertions, 0 deletions
diff --git a/src/pdarun.h b/src/pdarun.h
new file mode 100644
index 00000000..4003b9be
--- /dev/null
+++ b/src/pdarun.h
@@ -0,0 +1,471 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _COLM_PDARUN_H
+#define _COLM_PDARUN_H
+
+#include <colm/input.h>
+#include <colm/defs.h>
+#include <colm/tree.h>
+#include <colm/struct.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct colm_program;
+
+#define MARK_SLOTS 32
+
+struct fsm_tables
+{
+ long *actions;
+ long *key_offsets;
+ char *trans_keys;
+ long *single_lengths;
+ long *range_lengths;
+ long *index_offsets;
+ long *transTargsWI;
+ long *transActionsWI;
+ long *to_state_actions;
+ long *from_state_actions;
+ long *eof_actions;
+ long *eof_targs;
+ long *entry_by_region;
+
+ long num_states;
+ long num_actions;
+ long num_trans_keys;
+ long num_single_lengths;
+ long num_range_lengths;
+ long num_index_offsets;
+ long numTransTargsWI;
+ long numTransActionsWI;
+ long num_regions;
+
+ long start_state;
+ long first_final;
+ long error_state;
+
+ struct GenAction **action_switch;
+ long num_action_switch;
+};
+
+#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
+ #error "SIZEOF_LONG contained an unexpected value"
+#endif
+
+struct colm_execution;
+
+struct rt_code_vect
+{
+ code_t *data;
+ long tab_len;
+ long alloc_len;
+
+ /* FIXME: leak when freed. */
+};
+
+void list_add_after( list_t *list, list_el_t *prev_el, list_el_t *new_el );
+void list_add_before( list_t *list, list_el_t *next_el, list_el_t *new_el );
+
+void list_prepend( list_t *list, list_el_t *new_el );
+void list_append( list_t *list, list_el_t *new_el );
+
+list_el_t *list_detach( list_t *list, list_el_t *el );
+list_el_t *list_detach_first(list_t *list );
+list_el_t *list_detach_last(list_t *list );
+
+long list_length(list_t *list);
+
+struct function_info
+{
+ long frame_id;
+ long arg_size;
+ long frame_size;
+};
+
+/*
+ * Program Data.
+ */
+
+struct pat_cons_info
+{
+ long offset;
+ long num_bindings;
+};
+
+struct pat_cons_node
+{
+ long id;
+ long prod_num;
+ long next;
+ long child;
+ long bind_id;
+ const char *data;
+ long length;
+ long left_ignore;
+ long right_ignore;
+
+ /* Just match nonterminal, don't go inside. */
+ unsigned char stop;
+};
+
+/* FIXME: should have a descriptor for object types to give the length. */
+
+struct lang_el_info
+{
+ const char *name;
+ const char *xml_tag;
+ unsigned char repeat;
+ unsigned char list;
+ unsigned char literal;
+ unsigned char ignore;
+
+ long frame_id;
+
+ long object_type_id;
+ long ofi_offset;
+ long object_length;
+
+ long term_dup_id;
+ long mark_id;
+ long capture_attr;
+ long num_capture_attr;
+};
+
+struct struct_el_info
+{
+ long size;
+ short *trees;
+ long trees_len;
+};
+
+struct prod_info
+{
+ unsigned long lhs_id;
+ short prod_num;
+ long length;
+ const char *name;
+ long frame_id;
+ unsigned char lhs_upref;
+ unsigned char *copy;
+ long copy_len;
+};
+
+/* Must match the LocalType enum. */
+#define LI_Tree 1
+#define LI_Iter 2
+#define LI_RevIter 3
+#define LI_UserIter 4
+
+struct local_info
+{
+ char type;
+ short offset;
+};
+
+struct frame_info
+{
+ const char *name;
+ code_t *codeWV;
+ long codeLenWV;
+ code_t *codeWC;
+ long codeLenWC;
+ struct local_info *locals;
+ long locals_len;
+ long arg_size;
+ long frame_size;
+ char ret_tree;
+};
+
+struct region_info
+{
+ long default_token;
+ long eof_frame_id;
+ int ci_lel_id;
+};
+
+typedef struct _CaptureAttr
+{
+ long mark_enter;
+ long mark_leave;
+ long offset;
+} CaptureAttr;
+
+struct pda_tables
+{
+ /* Parser table data. */
+ int *indices;
+ int *owners;
+ int *keys;
+ unsigned int *offsets;
+ unsigned int *targs;
+ unsigned int *act_inds;
+ unsigned int *actions;
+ int *commit_len;
+ int *token_region_inds;
+ int *token_regions;
+ int *token_pre_regions;
+
+ int num_indices;
+ int num_keys;
+ int num_states;
+ int num_targs;
+ int num_act_inds;
+ int num_actions;
+ int num_commit_len;
+ int num_region_items;
+ int num_pre_region_items;
+};
+
+struct pool_block
+{
+ void *data;
+ struct pool_block *next;
+};
+
+struct pool_item
+{
+ struct pool_item *next;
+};
+
+struct pool_alloc
+{
+ struct pool_block *head;
+ long nextel;
+ struct pool_item *pool;
+ int sizeofT;
+};
+
+struct pda_run
+{
+ /*
+ * Scanning.
+ */
+ struct fsm_tables *fsm_tables;
+
+ struct run_buf *consume_buf;
+
+ long region, pre_region;
+ long fsm_cs, next_cs, act;
+ alph_t *start;
+ alph_t *tokstart;
+ long tokend;
+ long tokpref;
+ alph_t *p, *pe;
+ char scan_eof;
+
+ char return_result;
+ char skip_tokpref;
+ char eof_term_recvd;
+
+ alph_t *mark[MARK_SLOTS];
+ long matched_token;
+
+ /*
+ * Parsing
+ */
+ int num_retry;
+ parse_tree_t *stack_top;
+ ref_t *token_list;
+ int pda_cs;
+ int next_region_ind;
+
+ struct pda_tables *pda_tables;
+ int parser_id;
+
+ /* Reused. */
+ struct rt_code_vect rcode_collect;
+ struct rt_code_vect reverse_code;
+
+ int stop_parsing;
+ long stop_target;
+
+ parse_tree_t *accum_ignore;
+
+ kid_t *bt_point;
+
+ struct bindings *bindings;
+
+ int revert_on;
+
+ struct colm_struct *context;
+
+ int stop;
+ int parse_error;
+
+ long steps;
+ long target_steps;
+
+ /* The shift count simply tracks the number of shifts that have happend.
+ * The commit shift count is the shift count when the last commit occurred.
+ * If we back up to this number of shifts then we decide we cannot proceed.
+ * The commit shift count is initialized to -1. */
+ long shift_count;
+ long commit_shift_count;
+
+ int on_deck;
+
+ /*
+ * Data we added when refactoring the parsing engine into a coroutine.
+ */
+
+ parse_tree_t *parse_input;
+ struct frame_info *fi;
+ int reduction;
+ parse_tree_t *red_lel;
+ int cur_state;
+ parse_tree_t *lel;
+ int trigger_undo;
+
+ int token_id;
+ head_t *tokdata;
+ int frame_id;
+ int next;
+ parse_tree_t *undo_lel;
+
+ int check_next;
+ int check_stop;
+
+ /* The lhs is sometimes saved before reduction actions in case it is
+ * replaced and we need to restore it on backtracking */
+ tree_t *parsed;
+
+ int reject;
+
+ /* Instruction pointer to use when we stop parsing and execute code. */
+ code_t *code;
+
+ int rc_block_count;
+
+ tree_t *parse_error_text;
+
+ /* Zero indicates parsing proper. Nonzero is the reducer id. */
+ int reducer;
+
+ parse_tree_t *last_final;
+
+ struct pool_alloc *parse_tree_pool;
+ struct pool_alloc local_pool;
+
+ /* Disregard any alternate parse paths, just go right to failure. */
+ int fail_parsing;
+};
+
+void colm_pda_init( struct colm_program *prg, struct pda_run *pda_run,
+ struct pda_tables *tables, int parser_id, long stop_target,
+ int revert_on, struct colm_struct *context, int reducer );
+
+void colm_pda_clear( struct colm_program *prg, struct colm_tree **sp,
+ struct pda_run *pda_run );
+
+void colm_rt_code_vect_replace( struct rt_code_vect *vect, long pos,
+ const code_t *val, long len );
+void colm_rt_code_vect_empty( struct rt_code_vect *vect );
+void colm_rt_code_vect_remove( struct rt_code_vect *vect, long pos, long len );
+
+void init_rt_code_vect( struct rt_code_vect *code_vect );
+
+inline static void append_code_val( struct rt_code_vect *vect, const code_t val );
+inline static void append_code_vect( struct rt_code_vect *vect, const code_t *val, long len );
+inline static void append_half( struct rt_code_vect *vect, half_t half );
+inline static void append_word( struct rt_code_vect *vect, word_t word );
+
+inline static void append_code_vect( struct rt_code_vect *vect, const code_t *val, long len )
+{
+ colm_rt_code_vect_replace( vect, vect->tab_len, val, len );
+}
+
+inline static void append_code_val( struct rt_code_vect *vect, const code_t val )
+{
+ colm_rt_code_vect_replace( vect, vect->tab_len, &val, 1 );
+}
+
+inline static void append_half( struct rt_code_vect *vect, half_t half )
+{
+ /* not optimal. */
+ append_code_val( vect, half & 0xff );
+ append_code_val( vect, (half>>8) & 0xff );
+}
+
+inline static void append_word( struct rt_code_vect *vect, word_t word )
+{
+ /* not optimal. */
+ append_code_val( vect, word & 0xff );
+ append_code_val( vect, (word>>8) & 0xff );
+ append_code_val( vect, (word>>16) & 0xff );
+ append_code_val( vect, (word>>24) & 0xff );
+ #if SIZEOF_LONG == 8
+ append_code_val( vect, (word>>32) & 0xff );
+ append_code_val( vect, (word>>40) & 0xff );
+ append_code_val( vect, (word>>48) & 0xff );
+ append_code_val( vect, (word>>56) & 0xff );
+ #endif
+}
+
+void colm_increment_steps( struct pda_run *pda_run );
+void colm_decrement_steps( struct pda_run *pda_run );
+
+void colm_clear_stream_impl( struct colm_program *prg, tree_t **sp, struct stream_impl *input_stream );
+
+#define PCR_START 1
+#define PCR_DONE 2
+#define PCR_REDUCTION 3
+#define PCR_GENERATION 4
+#define PCR_PRE_EOF 5
+#define PCR_REVERSE 6
+
+head_t *colm_stream_pull( struct colm_program *prg, struct colm_tree **sp,
+ struct pda_run *pda_run, struct input_impl *is, long length );
+head_t *colm_string_alloc_pointer( struct colm_program *prg, const char *data, long length );
+
+kid_t *make_token_with_data( struct colm_program *prg, struct pda_run *pda_run,
+ struct input_impl *input_stream, int id, head_t *tokdata );
+
+long colm_parse_loop( struct colm_program *prg, tree_t **sp, struct pda_run *pda_run,
+ struct input_impl *input_stream, long entry );
+
+long colm_parse_frag( struct colm_program *prg, tree_t **sp,
+ struct pda_run *pda_run, input_t *input, long entry );
+long colm_parse_finish( struct colm_program *prg, tree_t **sp,
+ struct pda_run *pda_run, stream_t *input, long entry );
+long colm_parse_undo_frag( struct colm_program *prg, tree_t **sp, struct pda_run *pda_run,
+ input_t *input, long entry, long steps );
+
+void commit_clear_kid_list( program_t *prg, tree_t **sp, kid_t *kid );
+void commit_clear_parse_tree( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, parse_tree_t *pt );
+void commit_reduce( program_t *prg, tree_t **root,
+ struct pda_run *pda_run );
+
+tree_t *get_parsed_root( struct pda_run *pda_run, int stop );
+
+void colm_parse_reduce_commit( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _COLM_PDRUN_H */
+