summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2015-11-30 16:26:39 -0500
committerAdrian Thurston <thurston@complang.org>2015-11-30 16:26:39 -0500
commit5e75abcbc165994b9c28fe98c1944da4ce31a7cb (patch)
treeef1fc4a4c275b4fe01a3458c5b4da2e348a42940
parentdd66b443403b72ef2e2a076db471d6d819d95e05 (diff)
downloadcolm-5e75abcbc165994b9c28fe98c1944da4ce31a7cb.tar.gz
allow omission of location in reductions
There is code in here for omission of location and data in reductions. Unfortunately we cannot enable the omission of data unless we make changes to the backtracking. In the current implementation we push the token data with an mcopy from the data in the token.
-rw-r--r--src/colm.h5
-rw-r--r--src/compiler.cc4
-rw-r--r--src/compiler.h7
-rw-r--r--src/consinit.cc4
-rw-r--r--src/parsetree.h7
-rw-r--r--src/pdabuild.cc22
-rw-r--r--src/pdarun.c137
-rw-r--r--src/pdarun.h10
-rw-r--r--src/program.c2
-rw-r--r--src/program.h2
-rw-r--r--src/reduce.cc118
11 files changed, 276 insertions, 42 deletions
diff --git a/src/colm.h b/src/colm.h
index 05c4c5b8..99e8810e 100644
--- a/src/colm.h
+++ b/src/colm.h
@@ -77,6 +77,11 @@ struct colm_location *colm_find_location( struct colm_program *prg, struct colm_
#define COLM_DBG_INPUT 0x00000040
#define COLM_DBG_SCAN 0x00000080
+#define COLM_RN_NEITHER 0x00
+#define COLM_RN_DATA 0x01
+#define COLM_RN_LOC 0x02
+#define COLM_RN_BOTH 0x03
+
/*
* Primary Interface.
*/
diff --git a/src/compiler.cc b/src/compiler.cc
index 6ee0d396..20e48038 100644
--- a/src/compiler.cc
+++ b/src/compiler.cc
@@ -1179,8 +1179,8 @@ void Compiler::prepGrammar()
noUndefindLangEls();
/* Put the language elements in an index by language element id. */
- langElIndex = new LangEl*[nextSymbolId+1];
- memset( langElIndex, 0, sizeof(LangEl*)*(nextSymbolId+1) );
+ langElIndex = new LangEl*[nextLelId+1];
+ memset( langElIndex, 0, sizeof(LangEl*)*(nextLelId+1) );
for ( LelList::Iter lel = langEls; lel.lte(); lel++ )
langElIndex[lel->id] = lel;
diff --git a/src/compiler.h b/src/compiler.h
index 97ad0457..19ac9dc8 100644
--- a/src/compiler.h
+++ b/src/compiler.h
@@ -883,7 +883,7 @@ struct Compiler
Namespace *rootNamespace;
- int nextSymbolId;
+ int nextLelId;
int firstNonTermId;
LangEl **langElIndex;
@@ -1012,9 +1012,12 @@ struct Compiler
void declareReVars();
- void loadRefs( Production *production, const ReduceTextItemList &list );
+ void initReductionNeeds( Reduction *reduction );
+ void loadRefs( Reduction *reduction, Production *production,
+ const ReduceTextItemList &list );
void writeHostCall();
+ void writeNeeds();
void writeCommit();
void writeLhsRef( Production *production, ReduceTextItem *i );
void writeRhsRef( Production *production, ReduceTextItem *i );
diff --git a/src/consinit.cc b/src/consinit.cc
index c2e4b10b..6b0e45b4 100644
--- a/src/consinit.cc
+++ b/src/consinit.cc
@@ -44,6 +44,10 @@ extern "C" void commit_reduce_forward( program_t *prg, tree_t **root,
extern "C" long commit_union_sz( int reducer ) { return 0; }
+extern "C" void init_need() {}
+extern "C" int reducer_need_tok( program_t *prg, struct pda_run *, int id ) { return 3; }
+extern "C" int reducer_need_ign( program_t *prg, struct pda_run * ) { return 3; }
+
using std::cout;
using std::cerr;
using std::endl;
diff --git a/src/parsetree.h b/src/parsetree.h
index 5f5f10a5..d1ce46d7 100644
--- a/src/parsetree.h
+++ b/src/parsetree.h
@@ -981,7 +981,9 @@ typedef Vector<ReduceAction*> ReduceActionVect;
struct Reduction
{
Reduction( const InputLoc &loc, String name )
- : loc(loc), name(name)
+ :
+ loc(loc), name(name),
+ needData(0), needLoc(0)
{
static int nextId = 1;
id = nextId++;
@@ -994,6 +996,9 @@ struct Reduction
String var;
int id;
+ bool *needData;
+ bool *needLoc;
+
ReduceActionList reduceActions;
ReduceNonTermList reduceNonTerms;
};
diff --git a/src/pdabuild.cc b/src/pdabuild.cc
index 707f113d..a7a82df8 100644
--- a/src/pdabuild.cc
+++ b/src/pdabuild.cc
@@ -165,7 +165,7 @@ void Compiler::makeLangElIds()
{
/* The first id 0 is reserved for the stack sentinal. A negative id means
* error to the parsing function, inducing backtracking. */
- nextSymbolId = 1;
+ nextLelId = 1;
/* First pass assigns to the user terminals. */
for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
@@ -176,33 +176,33 @@ void Compiler::makeLangElIds()
lel != errorLangEl &&
lel != noTokenLangEl )
{
- lel->id = nextSymbolId++;
+ lel->id = nextLelId++;
}
}
- //eofLangEl->id = nextSymbolId++;
+ //eofLangEl->id = nextLelId++;
for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
/* Must be a term, and not any of the special reserved terminals.
* Remember if the non terminal is a user non terminal. */
if ( lel->isEOF )
- lel->id = nextSymbolId++;
+ lel->id = nextLelId++;
}
/* Next assign to the eof notoken, which we always create. */
- noTokenLangEl->id = nextSymbolId++;
+ noTokenLangEl->id = nextLelId++;
/* Possibly assign to the error language element. */
if ( errorLangEl != 0 )
- errorLangEl->id = nextSymbolId++;
+ errorLangEl->id = nextLelId++;
/* Save this for the code generation. */
- firstNonTermId = nextSymbolId;
+ firstNonTermId = nextLelId;
/* A third and final pass assigns to everything else. */
for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
/* Anything else not yet assigned gets assigned now. */
if ( lel->id < 0 )
- lel->id = nextSymbolId++;
+ lel->id = nextLelId++;
}
assert( ptrLangEl->id == LEL_ID_PTR );
@@ -980,7 +980,7 @@ again:
void Compiler::analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls )
{
pdaGraph->maxState = pdaGraph->stateList.length() - 1;
- pdaGraph->maxLelId = nextSymbolId - 1;
+ pdaGraph->maxLelId = nextLelId - 1;
pdaGraph->maxOffset = pdaGraph->stateList.length() * pdaGraph->maxLelId;
for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
@@ -1435,12 +1435,12 @@ void Compiler::makeRuntimeData()
* lelInfo
*/
- count = nextSymbolId;
+ count = nextLelId;
runtimeData->lel_info = new lang_el_info[count];
runtimeData->num_lang_els = count;
memset( runtimeData->lel_info, 0, sizeof(struct lang_el_info)*count );
- for ( int i = 0; i < nextSymbolId; i++ ) {
+ for ( int i = 0; i < nextLelId; i++ ) {
LangEl *lel = langElIndex[i];
if ( lel != 0 ) {
runtimeData->lel_info[i].name = lel->fullLit;
diff --git a/src/pdarun.c b/src/pdarun.c
index 1ad3107f..91a0e8df 100644
--- a/src/pdarun.c
+++ b/src/pdarun.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
+ * Copyright 2007-2015 Adrian Thurston <thurston@complang.org>
*/
/* This file is part of Colm.
@@ -19,6 +19,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+
#include "config.h"
#include "debug.h"
#include "pdarun.h"
@@ -56,6 +57,13 @@
i = (tree_t*)w; \
} while(0)
+/* bit 0: data needed. bit 1: loc needed */
+#define RN_NONE 0x0
+#define RN_DATA 0x1
+#define RN_LOC 0x2
+#define RN_BOTH 0x3
+
+
static void init_fsm_run( program_t *prg, struct pda_run *pda_run )
{
pda_run->fsm_tables = prg->rtd->fsm_tables;
@@ -754,6 +762,87 @@ static head_t *extract_match( program_t *prg, tree_t **sp,
return head;
}
+static head_t *extract_no_d( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct stream_impl *is )
+{
+ long length = pda_run->toklen;
+
+ /* Just a consume, no data allocate. */
+ location_t *location = location_allocate( prg );
+ is->funcs->consume_data( prg, sp, is, length, location );
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->toklen = 0;
+ pda_run->tokstart = 0;
+
+ head_t *head = colm_string_alloc_pointer( prg, 0, 0 );
+
+ head->location = location;
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", is->byte );
+
+ return head;
+}
+
+static head_t *extract_no_l( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct stream_impl *is )
+{
+ long length = pda_run->toklen;
+
+ //debug( prg, REALM_PARSE, "extracting token of length: %ld\n", length );
+
+ struct run_buf *run_buf = pda_run->consume_buf;
+ if ( run_buf == 0 || length > ( FSM_BUFSIZE - run_buf->length ) ) {
+ run_buf = new_run_buf( length );
+ run_buf->next = pda_run->consume_buf;
+ pda_run->consume_buf = run_buf;
+ }
+
+ char *dest = run_buf->data + run_buf->length;
+
+ is->funcs->get_data( is, dest, length );
+
+ /* Using a dummpy location. */
+ location_t location;
+ memset( &location, 0, sizeof( location ) );
+ is->funcs->consume_data( prg, sp, is, length, &location );
+
+ run_buf->length += length;
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->toklen = 0;
+ pda_run->tokstart = 0;
+
+ head_t *head = colm_string_alloc_pointer( prg, dest, length );
+
+ /* Don't pass the location. */
+ head->location = 0;
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", is->byte );
+
+ return head;
+}
+
+static head_t *consume_match( program_t *prg, tree_t **sp,
+ struct pda_run *pda_run, struct stream_impl *is )
+{
+ long length = pda_run->toklen;
+
+ /* No data or location returned. We just consume the data. */
+ location_t dummy_loc;
+ memset( &dummy_loc, 0, sizeof(dummy_loc) );
+ is->funcs->consume_data( prg, sp, is, length, &dummy_loc );
+
+ pda_run->p = pda_run->pe = 0;
+ pda_run->toklen = 0;
+ pda_run->tokstart = 0;
+
+ debug( prg, REALM_PARSE, "location byte: %d\n", is->byte );
+
+ return 0;
+}
+
+
static head_t *peek_match( program_t *prg, struct pda_run *pda_run, struct stream_impl *is )
{
long length = pda_run->toklen;
@@ -788,20 +877,25 @@ static head_t *peek_match( program_t *prg, struct pda_run *pda_run, struct strea
static void send_ignore( program_t *prg, tree_t **sp,
struct pda_run *pda_run, struct stream_impl *is, long id )
{
- debug( prg, REALM_PARSE, "ignoring: %s\n", prg->rtd->lel_info[id].name );
+ if ( reducer_need_ign( prg, pda_run ) == RN_NONE ) {
+ consume_match( prg, sp, pda_run, is );
+ }
+ else {
+ debug( prg, REALM_PARSE, "ignoring: %s\n", prg->rtd->lel_info[id].name );
- /* Make the ignore string. */
- head_t *ignore_str = extract_match( prg, sp, pda_run, is );
+ /* Make the ignore string. */
+ head_t *ignore_str = extract_match( prg, sp, pda_run, is );
- debug( prg, REALM_PARSE, "ignoring: %.*s\n", ignore_str->length, ignore_str->data );
+ debug( prg, REALM_PARSE, "ignoring: %.*s\n", ignore_str->length, ignore_str->data );
- tree_t *tree = tree_allocate( prg );
- tree->refs = 1;
- tree->id = id;
- tree->tokdata = ignore_str;
+ tree_t *tree = tree_allocate( prg );
+ tree->refs = 1;
+ tree->id = id;
+ tree->tokdata = ignore_str;
- /* Send it to the pdaRun. */
- ignore_tree( prg, pda_run, tree );
+ /* Send it to the pdaRun. */
+ ignore_tree( prg, pda_run, tree );
+ }
}
static void send_token( program_t *prg, tree_t **sp,
@@ -810,7 +904,23 @@ static void send_token( program_t *prg, tree_t **sp,
int empty_ignore = pda_run->accum_ignore == 0;
/* Make the token data. */
- head_t *tokdata = extract_match( prg, sp, pda_run, is );
+ head_t *tokdata = 0;
+ int rn = reducer_need_tok( prg, pda_run, id );
+
+ switch ( rn ) {
+ case RN_NONE:
+ tokdata = consume_match( prg, sp, pda_run, is );
+ break;
+ case RN_DATA:
+ tokdata = extract_no_l( prg, sp, pda_run, is );
+ break;
+ case RN_LOC:
+ tokdata = extract_no_d( prg, sp, pda_run, is );
+ break;
+ case RN_BOTH:
+ tokdata = extract_match( prg, sp, pda_run, is );
+ break;
+ }
debug( prg, REALM_PARSE, "token: %s text: %.*s\n",
prg->rtd->lel_info[id].name,
@@ -831,7 +941,8 @@ static void send_token( program_t *prg, tree_t **sp,
set_region( pda_run, empty_ignore, parse_tree );
}
-static void send_tree( program_t *prg, tree_t **sp, struct pda_run *pda_run, struct stream_impl *is )
+static void send_tree( program_t *prg, tree_t **sp, struct pda_run *pda_run,
+ struct stream_impl *is )
{
kid_t *input = kid_allocate( prg );
input->tree = is->funcs->consume_tree( is );
diff --git a/src/pdarun.h b/src/pdarun.h
index 7af9ac0f..434f6631 100644
--- a/src/pdarun.h
+++ b/src/pdarun.h
@@ -454,13 +454,17 @@ long colm_parse_undo_frag( struct colm_program *prg, tree_t **sp, struct pda_run
void commit_clear_parse_tree( program_t *prg, tree_t **sp,
struct pda_run *pda_run, parse_tree_t *pt );
-void commit_reduce_forward( program_t *prg, tree_t **root,
- struct pda_run *pda_run, parse_tree_t *pt );
void commit_reduce( program_t *prg, tree_t **root,
struct pda_run *pda_run );
-long commit_union_sz( int reducer );
+/* Supplied by generated code. */
+void commit_reduce_forward( program_t *prg, tree_t **root,
+ struct pda_run *pda_run, parse_tree_t *pt );
+long commit_union_sz( int reducer );
+void init_need();
+int reducer_need_tok( program_t *prg, struct pda_run *pda_run, int id );
+int reducer_need_ign( program_t *prg, struct pda_run *pda_run );
#ifdef __cplusplus
}
diff --git a/src/program.c b/src/program.c
index 67a5f4bc..e09f7aa2 100644
--- a/src/program.c
+++ b/src/program.c
@@ -193,6 +193,8 @@ program_t *colm_new_program( struct colm_sections *rtd )
/* Allocate the VM stack. */
vm_init( prg );
+
+ init_need();
return prg;
}
diff --git a/src/program.h b/src/program.h
index c2c67d83..d39b4f4a 100644
--- a/src/program.h
+++ b/src/program.h
@@ -99,6 +99,8 @@ struct colm_sections
struct pda_run *pda_run, struct stream_impl *input_stream );
void (*init_bindings)( struct pda_run *pda_run );
void (*pop_binding)( struct pda_run *pda_run, parse_tree_t *tree );
+
+ void (*init_need)();
};
struct heap_list
diff --git a/src/reduce.cc b/src/reduce.cc
index fc6ef4b4..7088e3ed 100644
--- a/src/reduce.cc
+++ b/src/reduce.cc
@@ -50,11 +50,17 @@ void Compiler::writeCommitStub()
"}\n"
"\n"
"long commit_union_sz( int reducer ) { return 0; }\n"
+ "void init_need() {}\n"
+ "int reducer_need_tok( program_t *prg, "
+ "struct pda_run *pda_run, int id ) { return COLM_RN_BOTH; }\n"
+ "int reducer_need_ign( program_t *prg, "
+ "struct pda_run *pda_run ) { return COLM_RN_BOTH; }\n"
"\n";
;
}
-void Compiler::loadRefs( Production *production, const ReduceTextItemList &list )
+void Compiler::loadRefs( Reduction *reduction, Production *production,
+ const ReduceTextItemList &list )
{
ObjectDef *objectDef = production->prodName->objectDef;
Vector<ProdEl*> rhsUsed;
@@ -107,7 +113,8 @@ void Compiler::loadRefs( Production *production, const ReduceTextItemList &list
}
/*
- * In the first pass we
+ * In the first pass we load using a parse tree cursor. This is for
+ * nonterms.
*/
bool useCursor = false;
for ( Vector<ProdEl*>::Iter rhs = rhsUsed; rhs.lte(); rhs++ ) {
@@ -153,6 +160,10 @@ void Compiler::loadRefs( Production *production, const ReduceTextItemList &list
}
}
+ /* In the second pass we load using a tree cursor. This is for token data
+ * and locations.
+ */
+
useCursor = false;
for ( Vector<ProdEl*>::Iter rhs = rhsUsed; rhs.lte(); rhs++ ) {
if ( *rhs != 0 && (*rhs)->production == production &&
@@ -180,20 +191,23 @@ void Compiler::loadRefs( Production *production, const ReduceTextItemList &list
Vector<ProdEl*>::Iter loc = locUsed;
for ( ; rhs.lte(); rhs++, loc++ ) {
- ProdEl *prodEl = *rhs;
- if ( prodEl != 0 ) {
- while ( cursorPos < rhs.pos() ) {
- *outStream <<
- " _tree_cursor = _tree_cursor->next;\n";
- cursorPos += 1;
- }
+ ProdEl *prodEl = *rhs;
+ if ( prodEl != 0 ) {
if ( prodEl->production == production ) {
if ( prodEl->langEl->type == LangEl::Term ) {
+
+ while ( cursorPos < rhs.pos() ) {
+ *outStream <<
+ " _tree_cursor = _tree_cursor->next;\n";
+ cursorPos += 1;
+ }
*outStream <<
" colm_data *_rhs" << rhs.pos() << " = "
"_tree_cursor->tree->tokdata;\n";
+
+ reduction->needData[prodEl->langEl->id] = true;
}
}
@@ -202,9 +216,18 @@ void Compiler::loadRefs( Production *production, const ReduceTextItemList &list
ProdEl *locEl = *loc;
if ( locEl != 0 ) {
if ( locEl->production == production ) {
+
+ while ( cursorPos < rhs.pos() ) {
+ *outStream <<
+ " _tree_cursor = _tree_cursor->next;\n";
+ cursorPos += 1;
+ }
+
*outStream <<
" colm_location *_loc" << loc.pos() << " = "
"colm_find_location( prg, _tree_cursor->tree );\n";
+
+ reduction->needLoc[locEl->langEl->id] = true;
}
}
}
@@ -300,6 +323,78 @@ struct CmpReduceAction
}
};
+void Compiler::initReductionNeeds( Reduction *reduction )
+{
+ reduction->needData = new bool[nextLelId];
+ reduction->needLoc = new bool[nextLelId];
+ memset( reduction->needData, 0, sizeof(bool)*nextLelId );
+ memset( reduction->needLoc, 0, sizeof(bool)*nextLelId );
+}
+
+void Compiler::writeNeeds()
+{
+ *outStream <<
+ "struct reduction_info\n"
+ "{\n"
+ " unsigned char need_data[" << nextLelId << "];\n"
+ " unsigned char need_loc[" << nextLelId << "];\n"
+ "};\n"
+ "\n";
+
+ *outStream <<
+ "struct reduction_info ri[" << rootNamespace->reductions.length() + 1 << "];\n"
+ "\n";
+
+ *outStream <<
+ "extern \"C\" void init_need()\n"
+ "{\n";
+
+ for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) {
+ Reduction *reduction = *r;
+ *outStream <<
+ " memset( ri[" << reduction->id << "]"
+ ".need_data, 0, sizeof(unsigned char) * " << nextLelId << " );\n"
+ " memset( ri[" << reduction->id << "]"
+ ".need_loc, 0, sizeof(unsigned char) * " << nextLelId << " );\n";
+
+ for ( int i = 0; i < nextLelId; i++ ) {
+ if ( reduction->needData[i] ) {
+ *outStream <<
+ " ri[" << reduction->id << "].need_data[" << i << "] = COLM_RN_DATA;\n";
+ }
+
+ if ( reduction->needLoc[i] ) {
+ *outStream <<
+ " ri[" << reduction->id << "].need_loc[" << i << "] = COLM_RN_LOC;\n";
+ }
+ }
+ }
+
+ *outStream <<
+ "}\n";
+
+ *outStream <<
+ "extern \"C\" int reducer_need_tok( program_t *prg, "
+ "struct pda_run *pda_run, int id )\n"
+ "{\n"
+ " if ( pda_run->reducer > 0 ) {\n"
+ /* Note we are forcing the reducer need for data. Enabling requires finding
+ * a solution for backtracking push. */
+ " return COLM_RN_DATA | ri[pda_run->reducer].need_data[id] | \n"
+ " ri[pda_run->reducer].need_loc[id];\n"
+ " }\n"
+ " return COLM_RN_BOTH;\n"
+ "}\n"
+ "\n"
+ "extern \"C\" int reducer_need_ign( program_t *prg, struct pda_run *pda_run )\n"
+ "{\n"
+ // Using this requires finding a solution for backtracking push back.
+ //" if ( pda_run->reducer > 0 )\n"
+ //" return COLM_RN_NEITHER;\n"
+ " return COLM_RN_BOTH;\n"
+ "}\n";
+}
+
void Compiler::writeCommit()
{
*outStream <<
@@ -386,6 +481,7 @@ void Compiler::writeCommit()
for ( ReductionVect::Iter r = rootNamespace->reductions; r.lte(); r++ ) {
Reduction *reduction = *r;
+ initReductionNeeds( reduction );
*outStream <<
"void " << reduction->name << "::commit_reduce_forward( program_t *prg, \n"
@@ -463,7 +559,7 @@ void Compiler::writeCommit()
*outStream <<
" if ( kid->tree->prod_num == " << prodNum << " ) {\n";
- loadRefs( action->production, action->itemList );
+ loadRefs( reduction, action->production, action->itemList );
*outStream <<
"#line " << action->loc.line << "\"" << action->loc.fileName << "\"\n";
@@ -496,4 +592,6 @@ void Compiler::writeCommit()
"}\n"
"\n";
}
+
+ writeNeeds();
}