diff options
author | Adrian Thurston <thurston@colm.net> | 2019-09-08 21:11:17 -0600 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2019-09-08 21:11:17 -0600 |
commit | c860c61607117582abd8f23881eed87957197484 (patch) | |
tree | 4d4e65dddc710e15f008189a9308d95924350c3f /src/input.c | |
parent | f37c916aed2600951b8966a86020406b0b0542cf (diff) | |
download | colm-c860c61607117582abd8f23881eed87957197484.tar.gz |
moved the original colm src dir to /colm
Diffstat (limited to 'src/input.c')
-rw-r--r-- | src/input.c | 740 |
1 files changed, 0 insertions, 740 deletions
diff --git a/src/input.c b/src/input.c deleted file mode 100644 index ba4643ee..00000000 --- a/src/input.c +++ /dev/null @@ -1,740 +0,0 @@ -/* - * Copyright 2007-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <colm/input.h> - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> -#include <unistd.h> -#include <stdbool.h> - -#include <colm/pdarun.h> -#include <colm/debug.h> -#include <colm/program.h> -#include <colm/tree.h> -#include <colm/bytecode.h> -#include <colm/pool.h> -#include <colm/struct.h> - -DEF_INPUT_FUNCS( input_funcs_seq, input_impl_seq ); -extern struct input_funcs_seq input_funcs; - -static bool is_tree( struct seq_buf *b ) -{ - return b->type == SB_TOKEN || b->type == SB_IGNORE; -} - -static bool is_stream( struct seq_buf *b ) -{ - return b->type == SB_SOURCE || b->type == SB_ACCUM; -} - -char *colm_filename_add( program_t *prg, const char *fn ) -{ - /* Search for it. */ - const char **ptr = prg->stream_fns; - while ( *ptr != 0 ) { - if ( strcmp( *ptr, fn ) == 0 ) - return (char*)*ptr; - ptr += 1; - } - - /* Not present, find. */ - int items = ptr - prg->stream_fns; - - prg->stream_fns = realloc( prg->stream_fns, sizeof(char*) * ( items + 2 ) ); - prg->stream_fns[items] = strdup( fn ); - prg->stream_fns[items+1] = 0; - - return (char*)prg->stream_fns[items]; -} - -static struct seq_buf *new_seq_buf() -{ - struct seq_buf *rb = (struct seq_buf*) malloc( sizeof(struct seq_buf) ); - memset( rb, 0, sizeof(struct seq_buf) ); - return rb; -} - -static void input_transfer_loc( struct colm_program *prg, location_t *loc, struct input_impl_seq *ss ) -{ -} - -static bool call_destructor( struct seq_buf *buf ) -{ - return is_stream( buf ) && buf->own_si; -} - -static void colm_input_destroy( program_t *prg, tree_t **sp, struct_t *s ) -{ - input_t *input = (input_t*) s; - struct input_impl *si = input->impl; - si->funcs->destructor( prg, sp, si ); -} - -static void input_stream_stash_head( struct colm_program *prg, struct input_impl_seq *si, struct seq_buf *seq_buf ) -{ - debug( prg, REALM_INPUT, "stash_head: stream %p buf %p\n", si, seq_buf ); - seq_buf->next = si->stash; - si->stash = seq_buf; -} - -static struct seq_buf *input_stream_pop_stash( struct colm_program *prg, struct input_impl_seq *si ) -{ - struct seq_buf *seq_buf = si->stash; - si->stash = si->stash->next; - - debug( prg, REALM_INPUT, "pop_stash: stream %p buf %p\n", si, seq_buf ); - - return seq_buf; -} - -static void maybe_split( struct colm_program *prg, struct input_impl_seq *iis ) -{ - struct seq_buf *head = iis->queue.head; - if ( head != 0 && is_stream( head ) ) { - /* Maybe the stream will split itself off. */ - struct stream_impl *split_off = head->si->funcs->split_consumed( prg, head->si ); - - if ( split_off != 0 ) { - debug( prg, REALM_INPUT, "maybe split: consumed is > 0, splitting\n" ); - - struct seq_buf *new_buf = new_seq_buf(); - new_buf->type = SB_ACCUM; - new_buf->si = split_off; - new_buf->own_si = 1; - - input_stream_stash_head( prg, iis, new_buf ); - } - } -} - - -/* - * StreamImpl struct, this wraps the list of input streams. - */ - -void init_input_impl_seq( struct input_impl_seq *is, char *name ) -{ - memset( is, 0, sizeof(struct input_impl_seq) ); - - is->type = 'S'; - //is->name = name; - //is->line = 1; - //is->column = 1; - //is->byte = 0; -} - -static struct seq_buf *input_stream_seq_pop_head( struct input_impl_seq *is ) -{ - struct seq_buf *ret = is->queue.head; - is->queue.head = is->queue.head->next; - if ( is->queue.head == 0 ) - is->queue.tail = 0; - else - is->queue.head->prev = 0; - return ret; -} - -static void input_stream_seq_append( struct input_impl_seq *is, struct seq_buf *seq_buf ) -{ - if ( is->queue.head == 0 ) { - seq_buf->prev = seq_buf->next = 0; - is->queue.head = is->queue.tail = seq_buf; - } - else { - is->queue.tail->next = seq_buf; - seq_buf->prev = is->queue.tail; - seq_buf->next = 0; - is->queue.tail = seq_buf; - } -} - -static struct seq_buf *input_stream_seq_pop_tail( struct input_impl_seq *is ) -{ - struct seq_buf *ret = is->queue.tail; - is->queue.tail = is->queue.tail->prev; - if ( is->queue.tail == 0 ) - is->queue.head = 0; - else - is->queue.tail->next = 0; - return ret; -} - -static void input_stream_seq_prepend( struct input_impl_seq *is, struct seq_buf *seq_buf ) -{ - if ( is->queue.head == 0 ) { - seq_buf->prev = seq_buf->next = 0; - is->queue.head = is->queue.tail = seq_buf; - } - else { - is->queue.head->prev = seq_buf; - seq_buf->prev = 0; - seq_buf->next = is->queue.head; - is->queue.head = seq_buf; - } -} - -void input_set_eof_mark( struct colm_program *prg, struct input_impl_seq *si, char eof_mark ) -{ - si->eof_mark = eof_mark; -} - -static void input_destructor( program_t *prg, tree_t **sp, struct input_impl_seq *si ) -{ - struct seq_buf *buf = si->queue.head; - while ( buf != 0 ) { - if ( is_tree( buf ) ) - colm_tree_downref( prg, sp, buf->tree ); - - if ( call_destructor( buf ) ) - buf->si->funcs->destructor( prg, sp, buf->si ); - - struct seq_buf *next = buf->next; - free( buf ); - buf = next; - } - - buf = si->stash; - while ( buf != 0 ) { - struct seq_buf *next = buf->next; - if ( call_destructor( buf ) ) - buf->si->funcs->destructor( prg, sp, buf->si ); - - free( buf ); - buf = next; - } - - si->queue.head = 0; - - /* FIXME: Need to leak this for now. Until we can return strings to a - * program loader and free them at a later date (after the colm program is - * deleted). */ - // if ( stream->impl->name != 0 ) - // free( stream->impl->name ); - - free( si ); -} - -static int input_get_option( struct colm_program *prg, struct input_impl_seq *ii, int option ) -{ - return ii->auto_trim; -} - -static void input_set_option( struct colm_program *prg, struct input_impl_seq *ii, int option, int value ) -{ - ii->auto_trim = value ? 1 : 0; -} - - -static int input_get_parse_block( struct colm_program *prg, struct input_impl_seq *is, int *pskip, char **pdp, int *copied ) -{ - int ret = 0; - *copied = 0; - - /* Move over skip bytes. */ - struct seq_buf *buf = is->queue.head; - while ( true ) { - if ( buf == 0 ) { - /* Got through the in-mem buffers without copying anything. */ - ret = is->eof_mark ? INPUT_EOF : INPUT_EOD; - break; - } - - if ( is_stream( buf ) ) { - struct stream_impl *si = buf->si; - int type = si->funcs->get_parse_block( prg, si, pskip, pdp, copied ); - - if ( type == INPUT_EOD || type == INPUT_EOF ) { - buf = buf->next; - continue; - } - - ret = type; - break; - } - - if ( buf->type == SB_TOKEN ) { - ret = INPUT_TREE; - break; - } - - if ( buf->type == SB_IGNORE ) { - ret = INPUT_IGNORE; - break; - } - - buf = buf->next; - } - -#if DEBUG - switch ( ret ) { - case INPUT_DATA: - if ( *pdp != 0 ) { - debug( prg, REALM_INPUT, "get parse block: DATA: %d %.*s\n", *copied, (int)(*copied), *pdp ); - } - else { - debug( prg, REALM_INPUT, "get parse block: DATA: %d\n", *copied ); - } - break; - case INPUT_EOD: - debug( prg, REALM_INPUT, "get parse block: EOD\n" ); - break; - case INPUT_EOF: - debug( prg, REALM_INPUT, "get parse block: EOF\n" ); - break; - case INPUT_TREE: - debug( prg, REALM_INPUT, "get parse block: TREE\n" ); - break; - case INPUT_IGNORE: - debug( prg, REALM_INPUT, "get parse block: IGNORE\n" ); - break; - case INPUT_LANG_EL: - debug( prg, REALM_INPUT, "get parse block: LANG_EL\n" ); - break; - } -#endif - - return ret; -} - -static int input_get_data( struct colm_program *prg, struct input_impl_seq *is, char *dest, int length ) -{ - int copied = 0; - - /* Move over skip bytes. */ - struct seq_buf *buf = is->queue.head; - while ( true ) { - if ( buf == 0 ) { - /* Got through the in-mem buffers without copying anything. */ - break; - } - - if ( is_stream( buf ) ) { - struct stream_impl *si = buf->si; - int glen = si->funcs->get_data( prg, si, dest+copied, length ); - - if ( glen == 0 ) { - //debug( REALM_INPUT, "skipping over input\n" ); - buf = buf->next; - continue; - } - - copied += glen; - length -= glen; - } - else if ( buf->type == SB_TOKEN ) - break; - else if ( buf->type == SB_IGNORE ) - break; - - if ( length == 0 ) { - //debug( REALM_INPUT, "exiting get data\n", length ); - break; - } - - buf = buf->next; - } - - return copied; -} - -/* - * Consume - */ - -static int input_consume_data( struct colm_program *prg, struct input_impl_seq *si, int length, location_t *loc ) -{ - debug( prg, REALM_INPUT, "input_consume_data: stream %p consuming %d bytes\n", si, length ); - - int consumed = 0; - - /* Move over skip bytes. */ - while ( true ) { - struct seq_buf *buf = si->queue.head; - - if ( buf == 0 ) - break; - - if ( is_stream( buf ) ) { - struct stream_impl *sub = buf->si; - int slen = sub->funcs->consume_data( prg, sub, length, loc ); - //debug( REALM_INPUT, " got %d bytes from source\n", slen ); - - consumed += slen; - length -= slen; - } - else if ( buf->type == SB_TOKEN ) - break; - else if ( buf->type == SB_IGNORE ) - break; - else { - assert(false); - } - - if ( length == 0 ) { - //debug( REALM_INPUT, "exiting consume\n", length ); - break; - } - - struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); - input_stream_stash_head( prg, si, seq_buf ); - } - - return consumed; -} - -static int input_undo_consume_data( struct colm_program *prg, struct input_impl_seq *si, const char *data, int length ) -{ - /* When we push back data we need to move backwards through the block of - * text. The source stream type will */ - debug( prg, REALM_INPUT, "input_undo_consume_data: stream %p undoing consume of %d bytes\n", si, length ); - - assert( length > 0 ); - long tot = length; - int offset = 0; - int remaining = length; - - while ( true ) { - if ( is_stream( si->queue.head ) ) { - struct stream_impl *sub = si->queue.head->si; - int pushed_back = sub->funcs->undo_consume_data( prg, sub, data, remaining ); - remaining -= pushed_back; - offset += pushed_back; - - if ( remaining == 0 ) - break; - } - - struct seq_buf *b = input_stream_pop_stash( prg, si ); - input_stream_seq_prepend( si, b ); - } - - return tot; -} - -static tree_t *input_consume_tree( struct colm_program *prg, struct input_impl_seq *si ) -{ - debug( prg, REALM_INPUT, "input_consume_tree: stream %p\n", si ); - - while ( si->queue.head != 0 && is_stream( si->queue.head ) ) - { - debug( prg, REALM_INPUT, " stream %p consume: clearing source type\n", si ); - struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); - input_stream_stash_head( prg, si, seq_buf ); - } - - assert( si->queue.head != 0 && ( si->queue.head->type == SB_TOKEN || si->queue.head->type == SB_IGNORE ) ); - - { - struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); - input_stream_stash_head( prg, si, seq_buf ); - tree_t *tree = seq_buf->tree; - debug( prg, REALM_INPUT, " stream %p consume: tree: %p\n", si, tree ); - return tree; - } - - return 0; -} - - -static void input_undo_consume_tree( struct colm_program *prg, struct input_impl_seq *si, tree_t *tree, int ignore ) -{ - debug( prg, REALM_INPUT, "input_undo_consume_tree: stream %p undo consume tree %p\n", si, tree ); - - while ( true ) { - debug( prg, REALM_INPUT, " stream %p consume: clearing source type\n", si ); - - struct seq_buf *b = input_stream_pop_stash( prg, si ); - input_stream_seq_prepend( si, b ); - - if ( is_tree( b ) ) { - assert( b->tree->id == tree->id ); - break; - } - } -} - -/* - * Prepend - */ -static void input_prepend_data( struct colm_program *prg, struct input_impl_seq *si, const char *data, long length ) -{ - debug( prg, REALM_INPUT, "input_prepend_data: stream %p prepend data length %d\n", si, length ); - - maybe_split( prg, si ); - - struct stream_impl *sub_si = colm_impl_new_text( "<text1>", data, length ); - - struct seq_buf *new_buf = new_seq_buf(); - new_buf->type = SB_ACCUM; - new_buf->si = sub_si; - new_buf->own_si = 1; - - input_stream_seq_prepend( si, new_buf ); -} - -static int input_undo_prepend_data( struct colm_program *prg, struct input_impl_seq *si, int length ) -{ - debug( prg, REALM_INPUT, "input_undo_prepend_data: stream %p undo append data length %d\n", si, length ); - - struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); - free( seq_buf ); - - return 0; -} - -static void input_prepend_tree( struct colm_program *prg, struct input_impl_seq *si, tree_t *tree, int ignore ) -{ - debug( prg, REALM_INPUT, "input_prepend_tree: stream %p prepend tree %p\n", si, tree ); - - maybe_split( prg, si ); - - /* Create a new buffer for the data. This is the easy implementation. - * Something better is needed here. It puts a max on the amount of - * data that can be pushed back to the inputStream. */ - struct seq_buf *new_buf = new_seq_buf(); - new_buf->type = ignore ? SB_IGNORE : SB_TOKEN; - new_buf->tree = tree; - input_stream_seq_prepend( si, new_buf ); -} - -static tree_t *input_undo_prepend_tree( struct colm_program *prg, struct input_impl_seq *si ) -{ - debug( prg, REALM_INPUT, "input_undo_prepend_tree: stream %p undo prepend tree\n", si ); - - assert( si->queue.head != 0 && ( si->queue.head->type == SB_TOKEN || - si->queue.head->type == SB_IGNORE ) ); - - struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); - - tree_t *tree = seq_buf->tree; - free(seq_buf); - - debug( prg, REALM_INPUT, " stream %p tree %p\n", si, tree ); - - return tree; -} - - -static void input_prepend_stream( struct colm_program *prg, struct input_impl_seq *si, struct colm_stream *stream ) -{ - maybe_split( prg, si ); - - /* Create a new buffer for the data. This is the easy implementation. - * Something better is needed here. It puts a max on the amount of - * data that can be pushed back to the inputStream. */ - struct seq_buf *new_buf = new_seq_buf(); - new_buf->type = SB_SOURCE; - new_buf->si = stream_to_impl( stream ); - input_stream_seq_prepend( si, new_buf ); - - assert( ((struct stream_impl_data*)new_buf->si)->type == 'D' ); -} - -static tree_t *input_undo_prepend_stream( struct colm_program *prg, struct input_impl_seq *is ) -{ - struct seq_buf *seq_buf = input_stream_seq_pop_head( is ); - free( seq_buf ); - return 0; -} - -static void input_append_data( struct colm_program *prg, struct input_impl_seq *si, const char *data, long length ) -{ - debug( prg, REALM_INPUT, "input_append_data: stream %p append data length %d\n", si, length ); - - if ( si->queue.tail == 0 || si->queue.tail->type != SB_ACCUM ) { - debug( prg, REALM_INPUT, "input_append_data: creating accum\n" ); - - struct stream_impl *sub_si = colm_impl_new_accum( "<text2>" ); - - struct seq_buf *new_buf = new_seq_buf(); - new_buf->type = SB_ACCUM; - new_buf->si = sub_si; - new_buf->own_si = 1; - - input_stream_seq_append( si, new_buf ); - } - - si->queue.tail->si->funcs->append_data( prg, si->queue.tail->si, data, length ); -} - -static tree_t *input_undo_append_data( struct colm_program *prg, struct input_impl_seq *si, int length ) -{ - debug( prg, REALM_INPUT, "input_undo_append_data: stream %p undo append data length %d\n", si, length ); - - while ( true ) { - struct seq_buf *buf = si->queue.tail; - - if ( buf == 0 ) - break; - - if ( is_stream( buf ) ) { - struct stream_impl *sub = buf->si; - int slen = sub->funcs->undo_append_data( prg, sub, length ); - //debug( REALM_INPUT, " got %d bytes from source\n", slen ); - //consumed += slen; - length -= slen; - } - else if ( buf->type == SB_TOKEN ) - break; - else if ( buf->type == SB_IGNORE ) - break; - else { - assert(false); - } - - if ( length == 0 ) { - //debug( REALM_INPUT, "exiting consume\n", length ); - break; - } - - struct seq_buf *seq_buf = input_stream_seq_pop_tail( si ); - free( seq_buf ); - } - return 0; -} - -static void input_append_tree( struct colm_program *prg, struct input_impl_seq *si, tree_t *tree ) -{ - debug( prg, REALM_INPUT, "input_append_tree: stream %p append tree %p\n", si, tree ); - - struct seq_buf *ad = new_seq_buf(); - - input_stream_seq_append( si, ad ); - - ad->type = SB_TOKEN; - ad->tree = tree; -} - -static tree_t *input_undo_append_tree( struct colm_program *prg, struct input_impl_seq *si ) -{ - debug( prg, REALM_INPUT, "input_undo_append_tree: stream %p undo append tree\n", si ); - - struct seq_buf *seq_buf = input_stream_seq_pop_tail( si ); - tree_t *tree = seq_buf->tree; - free( seq_buf ); - return tree; -} - -static void input_append_stream( struct colm_program *prg, struct input_impl_seq *si, struct colm_stream *stream ) -{ - debug( prg, REALM_INPUT, "input_append_stream: stream %p append stream %p\n", si, stream ); - - struct seq_buf *ad = new_seq_buf(); - - input_stream_seq_append( si, ad ); - - ad->type = SB_SOURCE; - ad->si = stream_to_impl( stream ); - - assert( ((struct stream_impl_data*)ad->si)->type == 'D' ); -} - -static tree_t *input_undo_append_stream( struct colm_program *prg, struct input_impl_seq *si ) -{ - debug( prg, REALM_INPUT, "input_undo_append_stream: stream %p undo append stream\n", si ); - - struct seq_buf *seq_buf = input_stream_seq_pop_tail( si ); - free( seq_buf ); - return 0; -} - -struct input_funcs_seq input_funcs = -{ - &input_get_parse_block, - &input_get_data, - - /* Consume. */ - &input_consume_data, - &input_undo_consume_data, - - &input_consume_tree, - &input_undo_consume_tree, - - 0, /* consume_lang_el */ - 0, /* undo_consume_lang_el */ - - /* Prepend */ - &input_prepend_data, - &input_undo_prepend_data, - - &input_prepend_tree, - &input_undo_prepend_tree, - - &input_prepend_stream, - &input_undo_prepend_stream, - - /* Append */ - &input_append_data, - &input_undo_append_data, - - &input_append_tree, - &input_undo_append_tree, - - &input_append_stream, - &input_undo_append_stream, - - /* EOF */ - &input_set_eof_mark, - - &input_transfer_loc, - &input_destructor, - - /* Trimming */ - &input_get_option, - &input_set_option, -}; - -struct input_impl *colm_impl_new_generic( char *name ) -{ - struct input_impl_seq *ss = (struct input_impl_seq*)malloc(sizeof(struct input_impl_seq)); - init_input_impl_seq( ss, name ); - ss->funcs = (struct input_funcs*)&input_funcs; - return (struct input_impl*)ss; -} - -input_t *colm_input_new_struct( program_t *prg ) -{ - size_t memsize = sizeof(struct colm_input); - struct colm_input *input = (struct colm_input*) malloc( memsize ); - memset( input, 0, memsize ); - colm_struct_add( prg, (struct colm_struct *)input ); - input->id = prg->rtd->struct_input_id; - input->destructor = &colm_input_destroy; - return input; -} - -input_t *colm_input_new( program_t *prg ) -{ - struct input_impl *impl = colm_impl_new_generic( colm_filename_add( prg, "<internal>" ) ); - struct colm_input *input = colm_input_new_struct( prg ); - input->impl = impl; - return input; -} - -struct input_impl *input_to_impl( input_t *ptr ) -{ - return ptr->impl; -} |