From f653735830d537715f2885bd832cf04851d35401 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 14 Mar 2020 15:29:52 +0200 Subject: moved source files into commit repository --- src/input.c | 759 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 759 insertions(+) create mode 100644 src/input.c (limited to 'src/input.c') diff --git a/src/input.c b/src/input.c new file mode 100644 index 00000000..043791f2 --- /dev/null +++ b/src/input.c @@ -0,0 +1,759 @@ +/* + * Copyright 2007-2018 Adrian Thurston + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +DEF_INPUT_FUNCS( input_funcs_seq, input_impl_seq ); +extern struct input_funcs_seq input_funcs; + +static bool is_tree( struct seq_buf *b ) +{ + return b->type == SB_TOKEN || b->type == SB_IGNORE; +} + +static bool is_stream( struct seq_buf *b ) +{ + return b->type == SB_SOURCE || b->type == SB_ACCUM; +} + +char *colm_filename_add( program_t *prg, const char *fn ) +{ + /* Search for it. */ + const char **ptr = prg->stream_fns; + while ( *ptr != 0 ) { + if ( strcmp( *ptr, fn ) == 0 ) + return (char*)*ptr; + ptr += 1; + } + + /* Not present, find. */ + int items = ptr - prg->stream_fns; + + prg->stream_fns = realloc( prg->stream_fns, sizeof(char*) * ( items + 2 ) ); + prg->stream_fns[items] = strdup( fn ); + prg->stream_fns[items+1] = 0; + + return (char*)prg->stream_fns[items]; +} + +static struct seq_buf *new_seq_buf() +{ + struct seq_buf *rb = (struct seq_buf*) malloc( sizeof(struct seq_buf) ); + memset( rb, 0, sizeof(struct seq_buf) ); + return rb; +} + +static void input_transfer_loc( struct colm_program *prg, location_t *loc, + struct input_impl_seq *ss ) +{ +} + +static bool call_destructor( struct seq_buf *buf ) +{ + return is_stream( buf ) && buf->own_si; +} + +static void colm_input_destroy( program_t *prg, tree_t **sp, struct_t *s ) +{ + input_t *input = (input_t*) s; + struct input_impl *si = input->impl; + si->funcs->destructor( prg, sp, si ); +} + +static void input_stream_stash_head( struct colm_program *prg, + struct input_impl_seq *si, struct seq_buf *seq_buf ) +{ + debug( prg, REALM_INPUT, "stash_head: stream %p buf %p\n", si, seq_buf ); + seq_buf->next = si->stash; + si->stash = seq_buf; +} + +static struct seq_buf *input_stream_pop_stash( struct colm_program *prg, struct input_impl_seq *si ) +{ + struct seq_buf *seq_buf = si->stash; + si->stash = si->stash->next; + + debug( prg, REALM_INPUT, "pop_stash: stream %p buf %p\n", si, seq_buf ); + + return seq_buf; +} + +static void maybe_split( struct colm_program *prg, struct input_impl_seq *iis ) +{ + struct seq_buf *head = iis->queue.head; + if ( head != 0 && is_stream( head ) ) { + /* Maybe the stream will split itself off. */ + struct stream_impl *split_off = head->si->funcs->split_consumed( prg, head->si ); + + if ( split_off != 0 ) { + debug( prg, REALM_INPUT, "maybe split: consumed is > 0, splitting\n" ); + + struct seq_buf *new_buf = new_seq_buf(); + new_buf->type = SB_ACCUM; + new_buf->si = split_off; + new_buf->own_si = 1; + + input_stream_stash_head( prg, iis, new_buf ); + } + } +} + + +/* + * StreamImpl struct, this wraps the list of input streams. + */ + +void init_input_impl_seq( struct input_impl_seq *is, char *name ) +{ + memset( is, 0, sizeof(struct input_impl_seq) ); + + is->type = 'S'; + //is->name = name; + //is->line = 1; + //is->column = 1; + //is->byte = 0; +} + +static struct seq_buf *input_stream_seq_pop_head( struct input_impl_seq *is ) +{ + struct seq_buf *ret = is->queue.head; + is->queue.head = is->queue.head->next; + if ( is->queue.head == 0 ) + is->queue.tail = 0; + else + is->queue.head->prev = 0; + return ret; +} + +static void input_stream_seq_append( struct input_impl_seq *is, struct seq_buf *seq_buf ) +{ + if ( is->queue.head == 0 ) { + seq_buf->prev = seq_buf->next = 0; + is->queue.head = is->queue.tail = seq_buf; + } + else { + is->queue.tail->next = seq_buf; + seq_buf->prev = is->queue.tail; + seq_buf->next = 0; + is->queue.tail = seq_buf; + } +} + +static struct seq_buf *input_stream_seq_pop_tail( struct input_impl_seq *is ) +{ + struct seq_buf *ret = is->queue.tail; + is->queue.tail = is->queue.tail->prev; + if ( is->queue.tail == 0 ) + is->queue.head = 0; + else + is->queue.tail->next = 0; + return ret; +} + +static void input_stream_seq_prepend( struct input_impl_seq *is, struct seq_buf *seq_buf ) +{ + if ( is->queue.head == 0 ) { + seq_buf->prev = seq_buf->next = 0; + is->queue.head = is->queue.tail = seq_buf; + } + else { + is->queue.head->prev = seq_buf; + seq_buf->prev = 0; + seq_buf->next = is->queue.head; + is->queue.head = seq_buf; + } +} + +void input_set_eof_mark( struct colm_program *prg, struct input_impl_seq *si, char eof_mark ) +{ + si->eof_mark = eof_mark; +} + +static void input_destructor( program_t *prg, tree_t **sp, struct input_impl_seq *si ) +{ + struct seq_buf *buf = si->queue.head; + while ( buf != 0 ) { + if ( is_tree( buf ) ) + colm_tree_downref( prg, sp, buf->tree ); + + if ( call_destructor( buf ) ) + buf->si->funcs->destructor( prg, sp, buf->si ); + + struct seq_buf *next = buf->next; + free( buf ); + buf = next; + } + + buf = si->stash; + while ( buf != 0 ) { + struct seq_buf *next = buf->next; + if ( call_destructor( buf ) ) + buf->si->funcs->destructor( prg, sp, buf->si ); + + free( buf ); + buf = next; + } + + si->queue.head = 0; + + /* FIXME: Need to leak this for now. Until we can return strings to a + * program loader and free them at a later date (after the colm program is + * deleted). */ + // if ( stream->impl->name != 0 ) + // free( stream->impl->name ); + + free( si ); +} + +static int input_get_option( struct colm_program *prg, struct input_impl_seq *ii, + int option ) +{ + return ii->auto_trim; +} + +static void input_set_option( struct colm_program *prg, struct input_impl_seq *ii, + int option, int value ) +{ + ii->auto_trim = value ? 1 : 0; +} + + +static int input_get_parse_block( struct colm_program *prg, struct input_impl_seq *is, + int *pskip, alph_t **pdp, int *copied ) +{ + int ret = 0; + *copied = 0; + + /* Move over skip bytes. */ + struct seq_buf *buf = is->queue.head; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + ret = is->eof_mark ? INPUT_EOF : INPUT_EOD; + break; + } + + if ( is_stream( buf ) ) { + struct stream_impl *si = buf->si; + int type = si->funcs->get_parse_block( prg, si, pskip, pdp, copied ); + + if ( type == INPUT_EOD || type == INPUT_EOF ) { + buf = buf->next; + continue; + } + + ret = type; + break; + } + + if ( buf->type == SB_TOKEN ) { + ret = INPUT_TREE; + break; + } + + if ( buf->type == SB_IGNORE ) { + ret = INPUT_IGNORE; + break; + } + + buf = buf->next; + } + +#if DEBUG + switch ( ret ) { + case INPUT_DATA: + if ( *pdp != 0 ) { + debug( prg, REALM_INPUT, "get parse block: DATA: %d %.*s\n", + *copied, (int)(*copied), *pdp ); + } + else { + debug( prg, REALM_INPUT, "get parse block: DATA: %d\n", *copied ); + } + break; + case INPUT_EOD: + debug( prg, REALM_INPUT, "get parse block: EOD\n" ); + break; + case INPUT_EOF: + debug( prg, REALM_INPUT, "get parse block: EOF\n" ); + break; + case INPUT_TREE: + debug( prg, REALM_INPUT, "get parse block: TREE\n" ); + break; + case INPUT_IGNORE: + debug( prg, REALM_INPUT, "get parse block: IGNORE\n" ); + break; + case INPUT_LANG_EL: + debug( prg, REALM_INPUT, "get parse block: LANG_EL\n" ); + break; + } +#endif + + return ret; +} + +static int input_get_data( struct colm_program *prg, struct input_impl_seq *is, + alph_t *dest, int length ) +{ + int copied = 0; + + /* Move over skip bytes. */ + struct seq_buf *buf = is->queue.head; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + break; + } + + if ( is_stream( buf ) ) { + struct stream_impl *si = buf->si; + int glen = si->funcs->get_data( prg, si, dest+copied, length ); + + if ( glen == 0 ) { + //debug( REALM_INPUT, "skipping over input\n" ); + buf = buf->next; + continue; + } + + copied += glen; + length -= glen; + } + else if ( buf->type == SB_TOKEN ) + break; + else if ( buf->type == SB_IGNORE ) + break; + + if ( length == 0 ) { + //debug( REALM_INPUT, "exiting get data\n", length ); + break; + } + + buf = buf->next; + } + + return copied; +} + +/* + * Consume + */ + +static int input_consume_data( struct colm_program *prg, struct input_impl_seq *si, + int length, location_t *loc ) +{ + debug( prg, REALM_INPUT, "input_consume_data: stream %p consuming %d bytes\n", si, length ); + + int consumed = 0; + + /* Move over skip bytes. */ + while ( true ) { + struct seq_buf *buf = si->queue.head; + + if ( buf == 0 ) + break; + + if ( is_stream( buf ) ) { + struct stream_impl *sub = buf->si; + int slen = sub->funcs->consume_data( prg, sub, length, loc ); + //debug( REALM_INPUT, " got %d bytes from source\n", slen ); + + consumed += slen; + length -= slen; + } + else if ( buf->type == SB_TOKEN ) + break; + else if ( buf->type == SB_IGNORE ) + break; + else { + assert(false); + } + + if ( length == 0 ) { + //debug( REALM_INPUT, "exiting consume\n", length ); + break; + } + + struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); + input_stream_stash_head( prg, si, seq_buf ); + } + + return consumed; +} + +static int input_undo_consume_data( struct colm_program *prg, struct input_impl_seq *si, + const alph_t *data, int length ) +{ + /* When we push back data we need to move backwards through the block of + * text. The source stream type will */ + debug( prg, REALM_INPUT, "input_undo_consume_data: stream %p undoing consume of %d bytes\n", si, length ); + + assert( length > 0 ); + long tot = length; + int offset = 0; + int remaining = length; + + while ( true ) { + if ( is_stream( si->queue.head ) ) { + struct stream_impl *sub = si->queue.head->si; + int pushed_back = sub->funcs->undo_consume_data( prg, sub, data, remaining ); + remaining -= pushed_back; + offset += pushed_back; + + if ( remaining == 0 ) + break; + } + + struct seq_buf *b = input_stream_pop_stash( prg, si ); + input_stream_seq_prepend( si, b ); + } + + return tot; +} + +static tree_t *input_consume_tree( struct colm_program *prg, struct input_impl_seq *si ) +{ + debug( prg, REALM_INPUT, "input_consume_tree: stream %p\n", si ); + + while ( si->queue.head != 0 && is_stream( si->queue.head ) ) + { + debug( prg, REALM_INPUT, " stream %p consume: clearing source type\n", si ); + struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); + input_stream_stash_head( prg, si, seq_buf ); + } + + assert( si->queue.head != 0 && ( si->queue.head->type == SB_TOKEN || + si->queue.head->type == SB_IGNORE ) ); + + { + struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); + input_stream_stash_head( prg, si, seq_buf ); + tree_t *tree = seq_buf->tree; + debug( prg, REALM_INPUT, " stream %p consume: tree: %p\n", si, tree ); + return tree; + } + + return 0; +} + + +static void input_undo_consume_tree( struct colm_program *prg, struct input_impl_seq *si, + tree_t *tree, int ignore ) +{ + debug( prg, REALM_INPUT, "input_undo_consume_tree: stream %p undo " + "consume tree %p\n", si, tree ); + + while ( true ) { + debug( prg, REALM_INPUT, " stream %p consume: clearing source type\n", si ); + + struct seq_buf *b = input_stream_pop_stash( prg, si ); + input_stream_seq_prepend( si, b ); + + if ( is_tree( b ) ) { + assert( b->tree->id == tree->id ); + break; + } + } +} + +/* + * Prepend + */ +static void input_prepend_data( struct colm_program *prg, struct input_impl_seq *si, + struct colm_location *loc, const alph_t *data, long length ) +{ + debug( prg, REALM_INPUT, "input_prepend_data: stream %p prepend data length %d\n", si, length ); + + maybe_split( prg, si ); + + char *name = loc != 0 ? (char*)loc->name : ""; + struct stream_impl *sub_si = colm_impl_new_text( name, loc, data, length ); + + struct seq_buf *new_buf = new_seq_buf(); + new_buf->type = SB_ACCUM; + new_buf->si = sub_si; + new_buf->own_si = 1; + + input_stream_seq_prepend( si, new_buf ); +} + +static int input_undo_prepend_data( struct colm_program *prg, struct input_impl_seq *si, int length ) +{ + debug( prg, REALM_INPUT, "input_undo_prepend_data: stream %p undo " + "append data length %d\n", si, length ); + + struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); + free( seq_buf ); + + return 0; +} + +static void input_prepend_tree( struct colm_program *prg, struct input_impl_seq *si, + tree_t *tree, int ignore ) +{ + debug( prg, REALM_INPUT, "input_prepend_tree: stream %p prepend tree %p\n", si, tree ); + + maybe_split( prg, si ); + + /* Create a new buffer for the data. This is the easy implementation. + * Something better is needed here. It puts a max on the amount of + * data that can be pushed back to the inputStream. */ + struct seq_buf *new_buf = new_seq_buf(); + new_buf->type = ignore ? SB_IGNORE : SB_TOKEN; + new_buf->tree = tree; + input_stream_seq_prepend( si, new_buf ); +} + +static tree_t *input_undo_prepend_tree( struct colm_program *prg, struct input_impl_seq *si ) +{ + debug( prg, REALM_INPUT, "input_undo_prepend_tree: stream %p undo prepend tree\n", si ); + + assert( si->queue.head != 0 && ( si->queue.head->type == SB_TOKEN || + si->queue.head->type == SB_IGNORE ) ); + + struct seq_buf *seq_buf = input_stream_seq_pop_head( si ); + + tree_t *tree = seq_buf->tree; + free(seq_buf); + + debug( prg, REALM_INPUT, " stream %p tree %p\n", si, tree ); + + return tree; +} + + +static void input_prepend_stream( struct colm_program *prg, struct input_impl_seq *si, + struct colm_stream *stream ) +{ + maybe_split( prg, si ); + + /* Create a new buffer for the data. This is the easy implementation. + * Something better is needed here. It puts a max on the amount of + * data that can be pushed back to the inputStream. */ + struct seq_buf *new_buf = new_seq_buf(); + new_buf->type = SB_SOURCE; + new_buf->si = stream_to_impl( stream ); + input_stream_seq_prepend( si, new_buf ); + + assert( ((struct stream_impl_data*)new_buf->si)->type == 'D' ); +} + +static tree_t *input_undo_prepend_stream( struct colm_program *prg, struct input_impl_seq *is ) +{ + struct seq_buf *seq_buf = input_stream_seq_pop_head( is ); + free( seq_buf ); + return 0; +} + +static void input_append_data( struct colm_program *prg, struct input_impl_seq *si, + const alph_t *data, long length ) +{ + debug( prg, REALM_INPUT, "input_append_data: stream %p append data length %d\n", si, length ); + + if ( si->queue.tail == 0 || si->queue.tail->type != SB_ACCUM ) { + debug( prg, REALM_INPUT, "input_append_data: creating accum\n" ); + + struct stream_impl *sub_si = colm_impl_new_accum( "" ); + + struct seq_buf *new_buf = new_seq_buf(); + new_buf->type = SB_ACCUM; + new_buf->si = sub_si; + new_buf->own_si = 1; + + input_stream_seq_append( si, new_buf ); + } + + si->queue.tail->si->funcs->append_data( prg, si->queue.tail->si, data, length ); +} + +static tree_t *input_undo_append_data( struct colm_program *prg, struct input_impl_seq *si, int length ) +{ + debug( prg, REALM_INPUT, "input_undo_append_data: stream %p undo append data length %d\n", si, length ); + + while ( true ) { + struct seq_buf *buf = si->queue.tail; + + if ( buf == 0 ) + break; + + if ( is_stream( buf ) ) { + struct stream_impl *sub = buf->si; + int slen = sub->funcs->undo_append_data( prg, sub, length ); + //debug( REALM_INPUT, " got %d bytes from source\n", slen ); + //consumed += slen; + length -= slen; + } + else if ( buf->type == SB_TOKEN ) + break; + else if ( buf->type == SB_IGNORE ) + break; + else { + assert(false); + } + + if ( length == 0 ) { + //debug( REALM_INPUT, "exiting consume\n", length ); + break; + } + + struct seq_buf *seq_buf = input_stream_seq_pop_tail( si ); + free( seq_buf ); + } + return 0; +} + +static void input_append_tree( struct colm_program *prg, struct input_impl_seq *si, tree_t *tree ) +{ + debug( prg, REALM_INPUT, "input_append_tree: stream %p append tree %p\n", si, tree ); + + struct seq_buf *ad = new_seq_buf(); + + input_stream_seq_append( si, ad ); + + ad->type = SB_TOKEN; + ad->tree = tree; +} + +static tree_t *input_undo_append_tree( struct colm_program *prg, struct input_impl_seq *si ) +{ + debug( prg, REALM_INPUT, "input_undo_append_tree: stream %p undo append tree\n", si ); + + struct seq_buf *seq_buf = input_stream_seq_pop_tail( si ); + tree_t *tree = seq_buf->tree; + free( seq_buf ); + return tree; +} + +static void input_append_stream( struct colm_program *prg, struct input_impl_seq *si, + struct colm_stream *stream ) +{ + debug( prg, REALM_INPUT, "input_append_stream: stream %p append stream %p\n", si, stream ); + + struct seq_buf *ad = new_seq_buf(); + + input_stream_seq_append( si, ad ); + + ad->type = SB_SOURCE; + ad->si = stream_to_impl( stream ); + + assert( ((struct stream_impl_data*)ad->si)->type == 'D' ); +} + +static tree_t *input_undo_append_stream( struct colm_program *prg, struct input_impl_seq *si ) +{ + debug( prg, REALM_INPUT, "input_undo_append_stream: stream %p undo append stream\n", si ); + + struct seq_buf *seq_buf = input_stream_seq_pop_tail( si ); + free( seq_buf ); + return 0; +} + +struct input_funcs_seq input_funcs = +{ + &input_get_parse_block, + &input_get_data, + + /* Consume. */ + &input_consume_data, + &input_undo_consume_data, + + &input_consume_tree, + &input_undo_consume_tree, + + 0, /* consume_lang_el */ + 0, /* undo_consume_lang_el */ + + /* Prepend */ + &input_prepend_data, + &input_undo_prepend_data, + + &input_prepend_tree, + &input_undo_prepend_tree, + + &input_prepend_stream, + &input_undo_prepend_stream, + + /* Append */ + &input_append_data, + &input_undo_append_data, + + &input_append_tree, + &input_undo_append_tree, + + &input_append_stream, + &input_undo_append_stream, + + /* EOF */ + &input_set_eof_mark, + + &input_transfer_loc, + &input_destructor, + + /* Trimming */ + &input_get_option, + &input_set_option, +}; + +struct input_impl *colm_impl_new_generic( char *name ) +{ + struct input_impl_seq *ss = (struct input_impl_seq*)malloc(sizeof(struct input_impl_seq)); + init_input_impl_seq( ss, name ); + ss->funcs = (struct input_funcs*)&input_funcs; + return (struct input_impl*)ss; +} + +input_t *colm_input_new_struct( program_t *prg ) +{ + size_t memsize = sizeof(struct colm_input); + struct colm_input *input = (struct colm_input*) malloc( memsize ); + memset( input, 0, memsize ); + colm_struct_add( prg, (struct colm_struct *)input ); + input->id = prg->rtd->struct_input_id; + input->destructor = &colm_input_destroy; + return input; +} + +input_t *colm_input_new( program_t *prg ) +{ + struct input_impl *impl = colm_impl_new_generic( colm_filename_add( prg, "" ) ); + struct colm_input *input = colm_input_new_struct( prg ); + input->impl = impl; + return input; +} + +struct input_impl *input_to_impl( input_t *ptr ) +{ + return ptr->impl; +} -- cgit v1.2.1