diff options
author | Adrian Thurston <thurston@colm.net> | 2020-03-14 15:29:52 +0200 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2020-03-14 15:29:52 +0200 |
commit | f653735830d537715f2885bd832cf04851d35401 (patch) | |
tree | 95e6551e39407543366d4f49aedf7b78c6e8bbe1 /src/pdarun.h | |
parent | bcc54d5df10cf425e7134b06f70d7ffe1abee4e4 (diff) | |
download | colm-f653735830d537715f2885bd832cf04851d35401.tar.gz |
moved source files into commit repository
Diffstat (limited to 'src/pdarun.h')
-rw-r--r-- | src/pdarun.h | 471 |
1 files changed, 471 insertions, 0 deletions
diff --git a/src/pdarun.h b/src/pdarun.h new file mode 100644 index 00000000..4003b9be --- /dev/null +++ b/src/pdarun.h @@ -0,0 +1,471 @@ +/* + * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _COLM_PDARUN_H +#define _COLM_PDARUN_H + +#include <colm/input.h> +#include <colm/defs.h> +#include <colm/tree.h> +#include <colm/struct.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct colm_program; + +#define MARK_SLOTS 32 + +struct fsm_tables +{ + long *actions; + long *key_offsets; + char *trans_keys; + long *single_lengths; + long *range_lengths; + long *index_offsets; + long *transTargsWI; + long *transActionsWI; + long *to_state_actions; + long *from_state_actions; + long *eof_actions; + long *eof_targs; + long *entry_by_region; + + long num_states; + long num_actions; + long num_trans_keys; + long num_single_lengths; + long num_range_lengths; + long num_index_offsets; + long numTransTargsWI; + long numTransActionsWI; + long num_regions; + + long start_state; + long first_final; + long error_state; + + struct GenAction **action_switch; + long num_action_switch; +}; + +#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 + #error "SIZEOF_LONG contained an unexpected value" +#endif + +struct colm_execution; + +struct rt_code_vect +{ + code_t *data; + long tab_len; + long alloc_len; + + /* FIXME: leak when freed. */ +}; + +void list_add_after( list_t *list, list_el_t *prev_el, list_el_t *new_el ); +void list_add_before( list_t *list, list_el_t *next_el, list_el_t *new_el ); + +void list_prepend( list_t *list, list_el_t *new_el ); +void list_append( list_t *list, list_el_t *new_el ); + +list_el_t *list_detach( list_t *list, list_el_t *el ); +list_el_t *list_detach_first(list_t *list ); +list_el_t *list_detach_last(list_t *list ); + +long list_length(list_t *list); + +struct function_info +{ + long frame_id; + long arg_size; + long frame_size; +}; + +/* + * Program Data. + */ + +struct pat_cons_info +{ + long offset; + long num_bindings; +}; + +struct pat_cons_node +{ + long id; + long prod_num; + long next; + long child; + long bind_id; + const char *data; + long length; + long left_ignore; + long right_ignore; + + /* Just match nonterminal, don't go inside. */ + unsigned char stop; +}; + +/* FIXME: should have a descriptor for object types to give the length. */ + +struct lang_el_info +{ + const char *name; + const char *xml_tag; + unsigned char repeat; + unsigned char list; + unsigned char literal; + unsigned char ignore; + + long frame_id; + + long object_type_id; + long ofi_offset; + long object_length; + + long term_dup_id; + long mark_id; + long capture_attr; + long num_capture_attr; +}; + +struct struct_el_info +{ + long size; + short *trees; + long trees_len; +}; + +struct prod_info +{ + unsigned long lhs_id; + short prod_num; + long length; + const char *name; + long frame_id; + unsigned char lhs_upref; + unsigned char *copy; + long copy_len; +}; + +/* Must match the LocalType enum. */ +#define LI_Tree 1 +#define LI_Iter 2 +#define LI_RevIter 3 +#define LI_UserIter 4 + +struct local_info +{ + char type; + short offset; +}; + +struct frame_info +{ + const char *name; + code_t *codeWV; + long codeLenWV; + code_t *codeWC; + long codeLenWC; + struct local_info *locals; + long locals_len; + long arg_size; + long frame_size; + char ret_tree; +}; + +struct region_info +{ + long default_token; + long eof_frame_id; + int ci_lel_id; +}; + +typedef struct _CaptureAttr +{ + long mark_enter; + long mark_leave; + long offset; +} CaptureAttr; + +struct pda_tables +{ + /* Parser table data. */ + int *indices; + int *owners; + int *keys; + unsigned int *offsets; + unsigned int *targs; + unsigned int *act_inds; + unsigned int *actions; + int *commit_len; + int *token_region_inds; + int *token_regions; + int *token_pre_regions; + + int num_indices; + int num_keys; + int num_states; + int num_targs; + int num_act_inds; + int num_actions; + int num_commit_len; + int num_region_items; + int num_pre_region_items; +}; + +struct pool_block +{ + void *data; + struct pool_block *next; +}; + +struct pool_item +{ + struct pool_item *next; +}; + +struct pool_alloc +{ + struct pool_block *head; + long nextel; + struct pool_item *pool; + int sizeofT; +}; + +struct pda_run +{ + /* + * Scanning. + */ + struct fsm_tables *fsm_tables; + + struct run_buf *consume_buf; + + long region, pre_region; + long fsm_cs, next_cs, act; + alph_t *start; + alph_t *tokstart; + long tokend; + long tokpref; + alph_t *p, *pe; + char scan_eof; + + char return_result; + char skip_tokpref; + char eof_term_recvd; + + alph_t *mark[MARK_SLOTS]; + long matched_token; + + /* + * Parsing + */ + int num_retry; + parse_tree_t *stack_top; + ref_t *token_list; + int pda_cs; + int next_region_ind; + + struct pda_tables *pda_tables; + int parser_id; + + /* Reused. */ + struct rt_code_vect rcode_collect; + struct rt_code_vect reverse_code; + + int stop_parsing; + long stop_target; + + parse_tree_t *accum_ignore; + + kid_t *bt_point; + + struct bindings *bindings; + + int revert_on; + + struct colm_struct *context; + + int stop; + int parse_error; + + long steps; + long target_steps; + + /* The shift count simply tracks the number of shifts that have happend. + * The commit shift count is the shift count when the last commit occurred. + * If we back up to this number of shifts then we decide we cannot proceed. + * The commit shift count is initialized to -1. */ + long shift_count; + long commit_shift_count; + + int on_deck; + + /* + * Data we added when refactoring the parsing engine into a coroutine. + */ + + parse_tree_t *parse_input; + struct frame_info *fi; + int reduction; + parse_tree_t *red_lel; + int cur_state; + parse_tree_t *lel; + int trigger_undo; + + int token_id; + head_t *tokdata; + int frame_id; + int next; + parse_tree_t *undo_lel; + + int check_next; + int check_stop; + + /* The lhs is sometimes saved before reduction actions in case it is + * replaced and we need to restore it on backtracking */ + tree_t *parsed; + + int reject; + + /* Instruction pointer to use when we stop parsing and execute code. */ + code_t *code; + + int rc_block_count; + + tree_t *parse_error_text; + + /* Zero indicates parsing proper. Nonzero is the reducer id. */ + int reducer; + + parse_tree_t *last_final; + + struct pool_alloc *parse_tree_pool; + struct pool_alloc local_pool; + + /* Disregard any alternate parse paths, just go right to failure. */ + int fail_parsing; +}; + +void colm_pda_init( struct colm_program *prg, struct pda_run *pda_run, + struct pda_tables *tables, int parser_id, long stop_target, + int revert_on, struct colm_struct *context, int reducer ); + +void colm_pda_clear( struct colm_program *prg, struct colm_tree **sp, + struct pda_run *pda_run ); + +void colm_rt_code_vect_replace( struct rt_code_vect *vect, long pos, + const code_t *val, long len ); +void colm_rt_code_vect_empty( struct rt_code_vect *vect ); +void colm_rt_code_vect_remove( struct rt_code_vect *vect, long pos, long len ); + +void init_rt_code_vect( struct rt_code_vect *code_vect ); + +inline static void append_code_val( struct rt_code_vect *vect, const code_t val ); +inline static void append_code_vect( struct rt_code_vect *vect, const code_t *val, long len ); +inline static void append_half( struct rt_code_vect *vect, half_t half ); +inline static void append_word( struct rt_code_vect *vect, word_t word ); + +inline static void append_code_vect( struct rt_code_vect *vect, const code_t *val, long len ) +{ + colm_rt_code_vect_replace( vect, vect->tab_len, val, len ); +} + +inline static void append_code_val( struct rt_code_vect *vect, const code_t val ) +{ + colm_rt_code_vect_replace( vect, vect->tab_len, &val, 1 ); +} + +inline static void append_half( struct rt_code_vect *vect, half_t half ) +{ + /* not optimal. */ + append_code_val( vect, half & 0xff ); + append_code_val( vect, (half>>8) & 0xff ); +} + +inline static void append_word( struct rt_code_vect *vect, word_t word ) +{ + /* not optimal. */ + append_code_val( vect, word & 0xff ); + append_code_val( vect, (word>>8) & 0xff ); + append_code_val( vect, (word>>16) & 0xff ); + append_code_val( vect, (word>>24) & 0xff ); + #if SIZEOF_LONG == 8 + append_code_val( vect, (word>>32) & 0xff ); + append_code_val( vect, (word>>40) & 0xff ); + append_code_val( vect, (word>>48) & 0xff ); + append_code_val( vect, (word>>56) & 0xff ); + #endif +} + +void colm_increment_steps( struct pda_run *pda_run ); +void colm_decrement_steps( struct pda_run *pda_run ); + +void colm_clear_stream_impl( struct colm_program *prg, tree_t **sp, struct stream_impl *input_stream ); + +#define PCR_START 1 +#define PCR_DONE 2 +#define PCR_REDUCTION 3 +#define PCR_GENERATION 4 +#define PCR_PRE_EOF 5 +#define PCR_REVERSE 6 + +head_t *colm_stream_pull( struct colm_program *prg, struct colm_tree **sp, + struct pda_run *pda_run, struct input_impl *is, long length ); +head_t *colm_string_alloc_pointer( struct colm_program *prg, const char *data, long length ); + +kid_t *make_token_with_data( struct colm_program *prg, struct pda_run *pda_run, + struct input_impl *input_stream, int id, head_t *tokdata ); + +long colm_parse_loop( struct colm_program *prg, tree_t **sp, struct pda_run *pda_run, + struct input_impl *input_stream, long entry ); + +long colm_parse_frag( struct colm_program *prg, tree_t **sp, + struct pda_run *pda_run, input_t *input, long entry ); +long colm_parse_finish( struct colm_program *prg, tree_t **sp, + struct pda_run *pda_run, stream_t *input, long entry ); +long colm_parse_undo_frag( struct colm_program *prg, tree_t **sp, struct pda_run *pda_run, + input_t *input, long entry, long steps ); + +void commit_clear_kid_list( program_t *prg, tree_t **sp, kid_t *kid ); +void commit_clear_parse_tree( program_t *prg, tree_t **sp, + struct pda_run *pda_run, parse_tree_t *pt ); +void commit_reduce( program_t *prg, tree_t **root, + struct pda_run *pda_run ); + +tree_t *get_parsed_root( struct pda_run *pda_run, int stop ); + +void colm_parse_reduce_commit( program_t *prg, tree_t **sp, + struct pda_run *pda_run ); + +#ifdef __cplusplus +} +#endif + +#endif /* _COLM_PDRUN_H */ + |