summaryrefslogtreecommitdiff
path: root/src/ctinput.cc
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@colm.net>2020-03-14 15:29:52 +0200
committerAdrian Thurston <thurston@colm.net>2020-03-14 15:29:52 +0200
commitf653735830d537715f2885bd832cf04851d35401 (patch)
tree95e6551e39407543366d4f49aedf7b78c6e8bbe1 /src/ctinput.cc
parentbcc54d5df10cf425e7134b06f70d7ffe1abee4e4 (diff)
downloadcolm-f653735830d537715f2885bd832cf04851d35401.tar.gz
moved source files into commit repository
Diffstat (limited to 'src/ctinput.cc')
-rw-r--r--src/ctinput.cc570
1 files changed, 570 insertions, 0 deletions
diff --git a/src/ctinput.cc b/src/ctinput.cc
new file mode 100644
index 00000000..f8267487
--- /dev/null
+++ b/src/ctinput.cc
@@ -0,0 +1,570 @@
+/*
+ * Copyright 2007-2018 Adrian Thurston <thurston@colm.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include <iostream>
+
+#include "compiler.h"
+#include "pool.h"
+//#include "debug.h"
+
+using std::cerr;
+using std::endl;
+
+DEF_INPUT_FUNCS( input_funcs_ct, input_impl_ct );
+
+extern input_funcs_ct pat_funcs;
+extern input_funcs_ct repl_funcs;
+
+struct input_impl_ct
+{
+ struct input_funcs *funcs;
+
+ char *name;
+ long line;
+ long column;
+ long byte;
+
+ struct Pattern *pattern;
+ struct PatternItem *pat_item;
+ struct Constructor *constructor;
+ struct ConsItem *cons_item;
+
+ char eof_mark;
+ char eof_sent;
+
+ int offset;
+};
+
+void ct_destructor( program_t *prg, tree_t **sp, struct input_impl_ct *ss )
+{
+}
+
+char ct_get_eof_sent( struct colm_program *prg, struct input_impl_ct *si )
+{
+ return si->eof_sent;
+}
+
+void ct_set_eof_sent( struct colm_program *prg, struct input_impl_ct *si, char eof_sent )
+{
+ si->eof_sent = eof_sent;
+}
+
+/*
+ * Pattern
+ */
+
+struct input_impl *colm_impl_new_pat( char *name, Pattern *pattern )
+{
+ struct input_impl_ct *ss = (struct input_impl_ct*)malloc(sizeof(struct input_impl_ct));
+ memset( ss, 0, sizeof(struct input_impl_ct) );
+ ss->pattern = pattern;
+ ss->pat_item = pattern->list->head;
+ ss->funcs = (struct input_funcs*)&pat_funcs;
+ return (struct input_impl*) ss;
+}
+
+int pat_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, int *pskip,
+ alph_t **pdp, int *copied )
+{
+ *copied = 0;
+
+ PatternItem *buf = ss->pat_item;
+ int offset = ss->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ return INPUT_EOF;
+
+ if ( buf->form == PatternItem::TypeRefForm )
+ return INPUT_LANG_EL;
+
+ assert ( buf->form == PatternItem::InputTextForm );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ alph_t *src = (alph_t*)&buf->data[offset];
+ int slen = avail;
+
+ /* Need to skip? */
+ if ( *pskip > 0 && slen <= *pskip ) {
+ /* Skipping the the whole source. */
+ *pskip -= slen;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += *pskip;
+ slen -= *pskip;
+ *pskip = 0;
+
+ *pdp = src;
+ *copied += slen;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return INPUT_DATA;
+}
+
+
+int pat_get_data( struct colm_program *prg, struct input_impl_ct *ss, alph_t *dest, int length )
+{
+ int copied = 0;
+
+ PatternItem *buf = ss->pat_item;
+ int offset = ss->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ break;
+
+ if ( buf->form == PatternItem::TypeRefForm )
+ break;
+
+ assert ( buf->form == PatternItem::InputTextForm );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[offset];
+ int slen = avail <= length ? avail : length;
+
+ memcpy( dest+copied, src, slen ) ;
+ copied += slen;
+ length -= slen;
+ }
+
+ if ( length == 0 )
+ break;
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return copied;
+}
+
+void pat_backup( struct input_impl_ct *ss )
+{
+ if ( ss->pat_item == 0 )
+ ss->pat_item = ss->pattern->list->tail;
+ else
+ ss->pat_item = ss->pat_item->prev;
+}
+
+int pat_consume_data( struct colm_program *prg, struct input_impl_ct *ss, int length, location_t *loc )
+{
+ //debug( REALM_INPUT, "consuming %ld bytes\n", length );
+
+ int consumed = 0;
+
+ while ( true ) {
+ if ( ss->pat_item == 0 )
+ break;
+
+ int avail = ss->pat_item->data.length() - ss->offset;
+
+ if ( length >= avail ) {
+ /* Read up to the end of the data. Advance the
+ * pattern item. */
+ ss->pat_item = ss->pat_item->next;
+ ss->offset = 0;
+
+ length -= avail;
+ consumed += avail;
+
+ if ( length == 0 )
+ break;
+ }
+ else {
+ ss->offset += length;
+ consumed += length;
+ break;
+ }
+ }
+
+ return consumed;
+}
+
+int pat_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const alph_t *data, int length )
+{
+ int origLen = length;
+ while ( true ) {
+ int avail = ss->offset;
+
+ /* Okay to go up to the front of the buffer. */
+ if ( length > avail ) {
+ ss->pat_item = ss->pat_item == 0 ?
+ ss->pattern->list->tail :
+ ss->pat_item->prev;
+ ss->offset = ss->pat_item->data.length();
+ length -= avail;
+ }
+ else {
+ ss->offset -= length;
+ break;
+ }
+ }
+
+ return origLen;
+}
+
+LangEl *pat_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss, long *bindId,
+ alph_t **data, long *length )
+{
+ LangEl *klangEl = ss->pat_item->prodEl->langEl;
+ *bindId = ss->pat_item->bindId;
+ *data = 0;
+ *length = 0;
+
+ ss->pat_item = ss->pat_item->next;
+ ss->offset = 0;
+ return klangEl;
+}
+
+void pat_undo_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss )
+{
+ pat_backup( ss );
+ ss->offset = ss->pat_item->data.length();
+}
+
+void ct_set_eof_mark( struct colm_program *prg, struct input_impl_ct *si, char eof_mark )
+{
+ si->eof_mark = eof_mark;
+}
+
+void ct_transfer_loc_seq( struct colm_program *prg, location_t *loc, struct input_impl_ct *ss )
+{
+ loc->name = ss->name;
+ loc->line = ss->line;
+ loc->column = ss->column;
+ loc->byte = ss->byte;
+}
+
+input_funcs_ct pat_funcs =
+{
+ &pat_get_parse_block,
+ &pat_get_data,
+
+ &pat_consume_data,
+ &pat_undo_consume_data,
+
+ 0, /* consume_tree */
+ 0, /* undo_consume_tree */
+
+ &pat_consume_lang_el,
+ &pat_undo_consume_lang_el,
+
+ 0, 0, 0, 0, 0, 0, /* prepend funcs. */
+ 0, 0, 0, 0, 0, 0, /* append funcs */
+
+ &ct_set_eof_mark,
+
+ &ct_transfer_loc_seq,
+ &ct_destructor,
+
+ 0, 0
+};
+
+
+/*
+ * Replacements
+ */
+
+struct input_impl *colm_impl_new_cons( char *name, Constructor *constructor )
+{
+ struct input_impl_ct *ss = (struct input_impl_ct*)malloc(sizeof(struct input_impl_ct));
+ memset( ss, 0, sizeof(struct input_impl_ct) );
+ ss->constructor = constructor;
+ ss->cons_item = constructor->list->head;
+ ss->funcs = (struct input_funcs*)&repl_funcs;
+ return (struct input_impl*)ss;
+}
+
+LangEl *repl_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss,
+ long *bindId, alph_t **data, long *length )
+{
+ LangEl *klangEl = ss->cons_item->type == ConsItem::ExprType ?
+ ss->cons_item->langEl : ss->cons_item->prodEl->langEl;
+ *bindId = ss->cons_item->bindId;
+
+ *data = 0;
+ *length = 0;
+
+ if ( ss->cons_item->type == ConsItem::LiteralType ) {
+ if ( ss->cons_item->prodEl->typeRef->pdaLiteral != 0 ) {
+ bool unusedCI;
+ prepareLitString( ss->cons_item->data, unusedCI,
+ ss->cons_item->prodEl->typeRef->pdaLiteral->data,
+ ss->cons_item->prodEl->typeRef->pdaLiteral->loc );
+
+ *data = (alph_t*)ss->cons_item->data.data;
+ *length = ss->cons_item->data.length();
+ }
+ }
+
+ ss->cons_item = ss->cons_item->next;
+ ss->offset = 0;
+ return klangEl;
+}
+
+int repl_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss,
+ int *pskip, alph_t **pdp, int *copied )
+{
+ *copied = 0;
+
+ ConsItem *buf = ss->cons_item;
+ int offset = ss->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ return INPUT_EOF;
+
+ if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::LiteralType )
+ return INPUT_LANG_EL;
+
+ assert ( buf->type == ConsItem::InputText );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ alph_t *src = (alph_t*)&buf->data[offset];
+ int slen = avail;
+
+ /* Need to skip? */
+ if ( *pskip > 0 && slen <= *pskip ) {
+ /* Skipping the the whole source. */
+ *pskip -= slen;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += *pskip;
+ slen -= *pskip;
+ *pskip = 0;
+
+ *pdp = src;
+ *copied += slen;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return INPUT_DATA;
+}
+
+int repl_get_data( struct colm_program *prg, struct input_impl_ct *ss, alph_t *dest, int length )
+{
+ int copied = 0;
+
+ ConsItem *buf = ss->cons_item;
+ int offset = ss->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ break;
+
+ if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::LiteralType )
+ break;
+
+ assert ( buf->type == ConsItem::InputText );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[offset];
+ int slen = avail <= length ? avail : length;
+
+ memcpy( dest+copied, src, slen ) ;
+ copied += slen;
+ length -= slen;
+ }
+
+ if ( length == 0 )
+ break;
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return copied;
+}
+
+void repl_backup( struct input_impl_ct *ss )
+{
+ if ( ss->cons_item == 0 )
+ ss->cons_item = ss->constructor->list->tail;
+ else
+ ss->cons_item = ss->cons_item->prev;
+}
+
+void repl_undo_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss )
+{
+ repl_backup( ss );
+ ss->offset = ss->cons_item->data.length();
+}
+
+
+int repl_consume_data( struct colm_program *prg, struct input_impl_ct *ss, int length, location_t *loc )
+{
+ int consumed = 0;
+
+ while ( true ) {
+ if ( ss->cons_item == 0 )
+ break;
+
+ int avail = ss->cons_item->data.length() - ss->offset;
+
+ if ( length >= avail ) {
+ /* Read up to the end of the data. Advance the
+ * pattern item. */
+ ss->cons_item = ss->cons_item->next;
+ ss->offset = 0;
+
+ length -= avail;
+ consumed += avail;
+
+ if ( length == 0 )
+ break;
+ }
+ else {
+ ss->offset += length;
+ consumed += length;
+ break;
+ }
+ }
+
+ return consumed;
+}
+
+int repl_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const alph_t *data, int length )
+{
+ int origLen = length;
+ while ( true ) {
+ int avail = ss->offset;
+
+ /* Okay to go up to the front of the buffer. */
+ if ( length > avail ) {
+ ss->cons_item = ss->cons_item == 0 ?
+ ss->constructor->list->tail :
+ ss->cons_item->prev;
+ ss->offset = ss->cons_item->data.length();
+ length -= avail;
+ }
+ else {
+ ss->offset -= length;
+ break;
+ }
+ }
+
+ return origLen;
+}
+
+input_funcs_ct repl_funcs =
+{
+ &repl_get_parse_block,
+ &repl_get_data,
+
+ &repl_consume_data,
+ &repl_undo_consume_data,
+
+ 0, /* consume_tree */
+ 0, /* undo_consume_tree. */
+
+ &repl_consume_lang_el,
+ &repl_undo_consume_lang_el,
+
+ 0, 0, 0, 0, 0, 0, /* prepend. */
+ 0, 0, 0, 0, 0, 0, /* append. */
+
+ &ct_set_eof_mark,
+
+ &ct_transfer_loc_seq,
+ &ct_destructor,
+
+ 0, 0
+};
+
+void pushBinding( pda_run *pdaRun, parse_tree_t *parseTree )
+{
+ /* If the item is bound then store it in the bindings array. */
+ pdaRun->bindings->push( parseTree );
+}
+
+extern "C" void internalSendNamedLangEl( program_t *prg, tree_t **sp,
+ struct pda_run *pdaRun, struct input_impl *is )
+{
+ /* All three set by consumeLangEl. */
+ long bindId;
+ alph_t *data;
+ long length;
+
+ LangEl *klangEl = is->funcs->consume_lang_el( prg, is, &bindId, &data, &length );
+
+ //cerr << "named langEl: " << prg->rtd->lelInfo[klangEl->id].name << endl;
+
+ /* Copy the token data. */
+ head_t *tokdata = 0;
+ if ( data != 0 )
+ tokdata = string_alloc_full( prg, colm_cstr_from_alph( data ), length );
+
+ kid_t *input = make_token_with_data( prg, pdaRun, is, klangEl->id, tokdata );
+
+ colm_increment_steps( pdaRun );
+
+ parse_tree_t *parseTree = parse_tree_allocate( pdaRun );
+ parseTree->id = input->tree->id;
+ parseTree->flags |= PF_NAMED;
+ parseTree->shadow = input;
+
+ if ( bindId > 0 )
+ pushBinding( pdaRun, parseTree );
+
+ pdaRun->parse_input = parseTree;
+}
+
+extern "C" void internalInitBindings( pda_run *pdaRun )
+{
+ /* Bindings are indexed at 1. Need a no-binding. */
+ pdaRun->bindings = new bindings;
+ pdaRun->bindings->push(0);
+}
+
+extern "C" void internalPopBinding( pda_run *pdaRun, parse_tree_t *parseTree )
+{
+ parse_tree_t *lastBound = pdaRun->bindings->top();
+ if ( lastBound == parseTree )
+ pdaRun->bindings->pop();
+}