summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--colm/bytecode.c31
-rw-r--r--colm/colm.h4
-rw-r--r--colm/compiler.cc8
-rw-r--r--colm/compiler.h2
-rw-r--r--colm/ctinput.cc27
-rw-r--r--colm/input.c10
-rw-r--r--colm/input.h30
-rw-r--r--colm/keyops.h8
-rw-r--r--colm/main.cc13
-rw-r--r--colm/pdacodegen.cc2
-rw-r--r--colm/pdarun.c32
-rw-r--r--colm/pdarun.h8
-rw-r--r--colm/print.c11
-rw-r--r--colm/stream.c28
-rw-r--r--colm/string.c4
15 files changed, 122 insertions, 96 deletions
diff --git a/colm/bytecode.c b/colm/bytecode.c
index 155a6a25..af50f679 100644
--- a/colm/bytecode.c
+++ b/colm/bytecode.c
@@ -35,6 +35,7 @@
#include <colm/pool.h>
#include <colm/debug.h>
+#include <colm/colm.h>
#define TRUE_VAL 1
#define FALSE_VAL 0
@@ -215,7 +216,7 @@ static word_t stream_append_text( program_t *prg, tree_t **sp, input_t *dest, tr
colm_print_tree_collect( prg, sp, &collect, input, trim );
/* Load it into the input. */
- impl->funcs->append_data( prg, impl, collect.data, collect.length );
+ impl->funcs->append_data( prg, impl, colm_alph_from_cstr( collect.data ), collect.length );
length = collect.length;
str_collect_destroy( &collect );
}
@@ -238,7 +239,7 @@ static word_t stream_append_tree( program_t *prg, tree_t **sp, input_t *dest, tr
colm_print_tree_collect( prg, sp, &collect, to_append, false );
/* Load it into the to_append. */
- impl->funcs->append_data( prg, impl, collect.data, collect.length );
+ impl->funcs->append_data( prg, impl, colm_alph_from_cstr( collect.data ), collect.length );
length = collect.length;
str_collect_destroy( &collect );
}
@@ -288,10 +289,11 @@ static tree_t *stream_pull_bc( program_t *prg, tree_t **sp, struct pda_run *pda_
}
-static void undo_stream_pull( struct colm_program *prg, struct input_impl *is, const char *data, long length )
+static void undo_stream_pull( struct colm_program *prg, struct input_impl *is,
+ const char *data, long length )
{
//debug( REALM_PARSE, "undoing stream pull\n" );
- is->funcs->undo_consume_data( prg, is, data, length );
+ is->funcs->undo_consume_data( prg, is, colm_alph_from_cstr( data ), length );
}
static void undo_pull( program_t *prg, input_t *input, tree_t *str )
@@ -407,12 +409,14 @@ static void downref_locals( program_t *prg, tree_t ***psp,
}
}
+
static tree_t *construct_arg0( program_t *prg, int argc, const char **argv, const int *argl )
{
tree_t *arg0 = 0;
if ( argc > 0 ) {
- size_t len = argl != 0 ? argl[0] : strlen(argv[0]);
- head_t *head = colm_string_alloc_pointer( prg, argv[0], len );
+ const char *argv0 = argv[0];
+ size_t len = argl != 0 ? argl[0] : strlen( argv[0] );
+ head_t *head = colm_string_alloc_pointer( prg, argv0, len );
arg0 = construct_string( prg, head );
colm_tree_upref( prg, arg0 );
}
@@ -425,7 +429,8 @@ static list_t *construct_argv( program_t *prg, int argc, const char **argv, cons
int i;
for ( i = 1; i < argc; i++ ) {
size_t len = argl != 0 ? argl[i] : strlen(argv[i]);
- head_t *head = colm_string_alloc_pointer( prg, argv[i], len );
+ const char *argv_i = argv[i];
+ head_t *head = colm_string_alloc_pointer( prg, argv_i, len );
tree_t *arg = construct_string( prg, head );
colm_tree_upref( prg, arg );
@@ -590,7 +595,9 @@ tree_t *colm_run_func( struct colm_program *prg, int frame_id,
((value_t*)execution.call_args)[p] = 0;
}
else {
- head_t *head = colm_string_alloc_pointer( prg, params[p], strlen(params[p]) );
+ const char *param_p = params[p];
+ size_t param_len = strlen(params[p]);
+ head_t *head = colm_string_alloc_pointer( prg, param_p, param_len );
tree_t *tree = construct_string( prg, head );
colm_tree_upref( prg, tree );
((tree_t**)execution.call_args)[p] = tree;
@@ -966,13 +973,14 @@ again:
* the local frame now. */
struct lang_el_info *lel_info = prg->rtd->lel_info;
struct pda_run *pda_run = exec->parser->pda_run;
- char **mark = pda_run->mark;
+ alph_t **mark = pda_run->mark;
int i, num_capture_attr = lel_info[pda_run->token_id].num_capture_attr;
for ( i = 0; i < num_capture_attr; i++ ) {
struct lang_el_info *lei = &lel_info[exec->parser->pda_run->token_id];
CaptureAttr *ca = &prg->rtd->capture_attr[lei->capture_attr + i];
- head_t *data = string_alloc_full( prg, mark[ca->mark_enter],
+ head_t *data = string_alloc_full( prg,
+ colm_cstr_from_alph( mark[ca->mark_enter] ),
mark[ca->mark_leave] - mark[ca->mark_enter] );
tree_t *string = construct_string( prg, data );
colm_tree_upref( prg, string );
@@ -3186,7 +3194,8 @@ again:
tree_t *str = 0;
if ( tree->tokdata->location ) {
const char *fn = tree->tokdata->location->name;
- head_t *data = string_alloc_full( prg, fn, strlen(fn) );
+ size_t fnlen = strlen( fn );
+ head_t *data = string_alloc_full( prg, fn, fnlen );
str = construct_string( prg, data );
colm_tree_upref( prg, str );
}
diff --git a/colm/colm.h b/colm/colm.h
index 5091d27e..39506cab 100644
--- a/colm/colm.h
+++ b/colm/colm.h
@@ -45,6 +45,7 @@ struct indent_impl
extern struct colm_sections colm_object;
typedef unsigned long colm_value_t;
+typedef unsigned char colm_alph_t;
struct colm_tree
{
@@ -99,6 +100,9 @@ struct colm_tree *colm_get_left_repeat_next( struct colm_tree *tree );
struct colm_tree *colm_get_left_repeat_val( struct colm_tree *tree );
struct colm_location *colm_find_location( struct colm_program *prg, struct colm_tree *tree );
+static inline const colm_alph_t *colm_alph_from_cstr( const char *cstr ) { return (const colm_alph_t*)cstr; }
+static inline const char *colm_cstr_from_alph( const colm_alph_t *alph ) { return (const char*)alph; }
+
/* Debug realms. To turn on, pass to colm_set_debug before invocation. */
#define COLM_DBG_BYTECODE 0x00000001
#define COLM_DBG_PARSE 0x00000002
diff --git a/colm/compiler.cc b/colm/compiler.cc
index 6b2b1032..72f87dac 100644
--- a/colm/compiler.cc
+++ b/colm/compiler.cc
@@ -86,12 +86,13 @@ Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd )
unsigned long ul = strtoul( str, 0, 16 );
+
if ( errno == ERANGE || (unusedBits && ul >> (size * 8)) ) {
error(loc) << "literal " << str << " overflows the alphabet type" << endl;
ul = 1 << (size * 8);
}
- if ( unusedBits && ul >> (size * 8 - 1) )
+ if ( keyOps->alphType->isSigned && unusedBits && ul >> (size * 8 - 1) )
ul |= (ULONG_MAX >> (size*8 ) ) << (size*8);
return Key( (long)ul );
@@ -492,7 +493,8 @@ void Compiler::initGraphDict( )
void Compiler::initKeyOps( )
{
/* Signedness and bounds. */
- HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
+ const HostType *alphType = alphTypeSet ? userAlphType :
+ &hostLang->hostTypes[hostLang->defaultHostType];
thisKeyOps.setAlphType( alphType );
if ( lowerNum != 0 ) {
@@ -1022,7 +1024,7 @@ pda_run *Compiler::parsePattern( program_t *prg, tree_t **sp, const InputLoc &lo
if ( pdaRun->parse_error_text != 0 ) {
colm_data *tokdata = pdaRun->parse_error_text->tokdata;
cerr << ": relative error: ";
- cerr.write( tokdata->data, tokdata->length );
+ cerr.write( (const char*)tokdata->data, tokdata->length );
}
else {
cerr << ": parse error";
diff --git a/colm/compiler.h b/colm/compiler.h
index f22b33e3..67d5b40e 100644
--- a/colm/compiler.h
+++ b/colm/compiler.h
@@ -596,7 +596,7 @@ struct Compiler
int nextPriorKey, nextNameId;
/* Alphabet type. */
- HostType *userAlphType;
+ const HostType *userAlphType;
bool alphTypeSet;
/* Element type and get key expression. */
diff --git a/colm/ctinput.cc b/colm/ctinput.cc
index efc2c69d..f8267487 100644
--- a/colm/ctinput.cc
+++ b/colm/ctinput.cc
@@ -88,7 +88,7 @@ struct input_impl *colm_impl_new_pat( char *name, Pattern *pattern )
}
int pat_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, int *pskip,
- char **pdp, int *copied )
+ alph_t **pdp, int *copied )
{
*copied = 0;
@@ -107,7 +107,7 @@ int pat_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, int
if ( avail > 0 ) {
/* The source data from the current buffer. */
- char *src = &buf->data[offset];
+ alph_t *src = (alph_t*)&buf->data[offset];
int slen = avail;
/* Need to skip? */
@@ -136,7 +136,7 @@ int pat_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, int
}
-int pat_get_data( struct colm_program *prg, struct input_impl_ct *ss, char *dest, int length )
+int pat_get_data( struct colm_program *prg, struct input_impl_ct *ss, alph_t *dest, int length )
{
int copied = 0;
@@ -215,7 +215,7 @@ int pat_consume_data( struct colm_program *prg, struct input_impl_ct *ss, int le
return consumed;
}
-int pat_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const char *data, int length )
+int pat_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const alph_t *data, int length )
{
int origLen = length;
while ( true ) {
@@ -239,7 +239,7 @@ int pat_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, c
}
LangEl *pat_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss, long *bindId,
- char **data, long *length )
+ alph_t **data, long *length )
{
LangEl *klangEl = ss->pat_item->prodEl->langEl;
*bindId = ss->pat_item->bindId;
@@ -310,7 +310,8 @@ struct input_impl *colm_impl_new_cons( char *name, Constructor *constructor )
return (struct input_impl*)ss;
}
-LangEl *repl_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss, long *bindId, char **data, long *length )
+LangEl *repl_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss,
+ long *bindId, alph_t **data, long *length )
{
LangEl *klangEl = ss->cons_item->type == ConsItem::ExprType ?
ss->cons_item->langEl : ss->cons_item->prodEl->langEl;
@@ -326,7 +327,7 @@ LangEl *repl_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss
ss->cons_item->prodEl->typeRef->pdaLiteral->data,
ss->cons_item->prodEl->typeRef->pdaLiteral->loc );
- *data = ss->cons_item->data;
+ *data = (alph_t*)ss->cons_item->data.data;
*length = ss->cons_item->data.length();
}
}
@@ -337,7 +338,7 @@ LangEl *repl_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss
}
int repl_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss,
- int *pskip, char **pdp, int *copied )
+ int *pskip, alph_t **pdp, int *copied )
{
*copied = 0;
@@ -356,7 +357,7 @@ int repl_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss,
if ( avail > 0 ) {
/* The source data from the current buffer. */
- char *src = &buf->data[offset];
+ alph_t *src = (alph_t*)&buf->data[offset];
int slen = avail;
/* Need to skip? */
@@ -384,7 +385,7 @@ int repl_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss,
return INPUT_DATA;
}
-int repl_get_data( struct colm_program *prg, struct input_impl_ct *ss, char *dest, int length )
+int repl_get_data( struct colm_program *prg, struct input_impl_ct *ss, alph_t *dest, int length )
{
int copied = 0;
@@ -468,7 +469,7 @@ int repl_consume_data( struct colm_program *prg, struct input_impl_ct *ss, int l
return consumed;
}
-int repl_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const char *data, int length )
+int repl_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const alph_t *data, int length )
{
int origLen = length;
while ( true ) {
@@ -527,7 +528,7 @@ extern "C" void internalSendNamedLangEl( program_t *prg, tree_t **sp,
{
/* All three set by consumeLangEl. */
long bindId;
- char *data;
+ alph_t *data;
long length;
LangEl *klangEl = is->funcs->consume_lang_el( prg, is, &bindId, &data, &length );
@@ -537,7 +538,7 @@ extern "C" void internalSendNamedLangEl( program_t *prg, tree_t **sp,
/* Copy the token data. */
head_t *tokdata = 0;
if ( data != 0 )
- tokdata = string_alloc_full( prg, data, length );
+ tokdata = string_alloc_full( prg, colm_cstr_from_alph( data ), length );
kid_t *input = make_token_with_data( prg, pdaRun, is, klangEl->id, tokdata );
diff --git a/colm/input.c b/colm/input.c
index b8101c6f..4342249a 100644
--- a/colm/input.c
+++ b/colm/input.c
@@ -253,7 +253,7 @@ static void input_set_option( struct colm_program *prg, struct input_impl_seq *i
static int input_get_parse_block( struct colm_program *prg, struct input_impl_seq *is,
- int *pskip, char **pdp, int *copied )
+ int *pskip, alph_t **pdp, int *copied )
{
int ret = 0;
*copied = 0;
@@ -326,7 +326,7 @@ static int input_get_parse_block( struct colm_program *prg, struct input_impl_se
}
static int input_get_data( struct colm_program *prg, struct input_impl_seq *is,
- char *dest, int length )
+ alph_t *dest, int length )
{
int copied = 0;
@@ -414,7 +414,7 @@ static int input_consume_data( struct colm_program *prg, struct input_impl_seq *
}
static int input_undo_consume_data( struct colm_program *prg, struct input_impl_seq *si,
- const char *data, int length )
+ const alph_t *data, int length )
{
/* When we push back data we need to move backwards through the block of
* text. The source stream type will */
@@ -492,7 +492,7 @@ static void input_undo_consume_tree( struct colm_program *prg, struct input_impl
* Prepend
*/
static void input_prepend_data( struct colm_program *prg, struct input_impl_seq *si,
- const char *data, long length )
+ const alph_t *data, long length )
{
debug( prg, REALM_INPUT, "input_prepend_data: stream %p prepend data length %d\n", si, length );
@@ -577,7 +577,7 @@ static tree_t *input_undo_prepend_stream( struct colm_program *prg, struct input
}
static void input_append_data( struct colm_program *prg, struct input_impl_seq *si,
- const char *data, long length )
+ const alph_t *data, long length )
{
debug( prg, REALM_INPUT, "input_append_data: stream %p append data length %d\n", si, length );
diff --git a/colm/input.h b/colm/input.h
index f116561f..c0a896d5 100644
--- a/colm/input.h
+++ b/colm/input.h
@@ -54,24 +54,26 @@ struct colm_stream;
struct input_impl;
struct stream_impl;
+typedef colm_alph_t alph_t;
+
#define DEF_INPUT_FUNCS( input_funcs, _input_impl ) \
struct input_funcs \
{ \
- int (*get_parse_block)( struct colm_program *prg, struct _input_impl *si, int *pskip, char **pdp, int *copied ); \
- int (*get_data)( struct colm_program *prg, struct _input_impl *si, char *dest, int length ); \
+ int (*get_parse_block)( struct colm_program *prg, struct _input_impl *si, int *pskip, alph_t **pdp, int *copied ); \
+ int (*get_data)( struct colm_program *prg, struct _input_impl *si, alph_t *dest, int length ); \
int (*consume_data)( struct colm_program *prg, struct _input_impl *si, int length, struct colm_location *loc ); \
- int (*undo_consume_data)( struct colm_program *prg, struct _input_impl *si, const char *data, int length ); \
+ int (*undo_consume_data)( struct colm_program *prg, struct _input_impl *si, const alph_t *data, int length ); \
struct colm_tree *(*consume_tree)( struct colm_program *prg, struct _input_impl *si ); \
void (*undo_consume_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree, int ignore ); \
- struct LangEl *(*consume_lang_el)( struct colm_program *prg, struct _input_impl *si, long *bind_id, char **data, long *length ); \
+ struct LangEl *(*consume_lang_el)( struct colm_program *prg, struct _input_impl *si, long *bind_id, alph_t **data, long *length ); \
void (*undo_consume_lang_el)( struct colm_program *prg, struct _input_impl *si ); \
- void (*prepend_data)( struct colm_program *prg, struct _input_impl *si, const char *data, long len ); \
+ void (*prepend_data)( struct colm_program *prg, struct _input_impl *si, const alph_t *data, long len ); \
int (*undo_prepend_data)( struct colm_program *prg, struct _input_impl *si, int length ); \
void (*prepend_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree, int ignore ); \
struct colm_tree *(*undo_prepend_tree)( struct colm_program *prg, struct _input_impl *si ); \
void (*prepend_stream)( struct colm_program *prg, struct _input_impl *si, struct colm_stream *stream ); \
struct colm_tree *(*undo_prepend_stream)( struct colm_program *prg, struct _input_impl *si ); \
- void (*append_data)( struct colm_program *prg, struct _input_impl *si, const char *data, long length ); \
+ void (*append_data)( struct colm_program *prg, struct _input_impl *si, const alph_t *data, long length ); \
struct colm_tree *(*undo_append_data)( struct colm_program *prg, struct _input_impl *si, int length ); \
void (*append_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree ); \
struct colm_tree *(*undo_append_tree)( struct colm_program *prg, struct _input_impl *si ); \
@@ -87,11 +89,11 @@ struct input_funcs \
#define DEF_STREAM_FUNCS( stream_funcs, _stream_impl ) \
struct stream_funcs \
{ \
- int (*get_parse_block)( struct colm_program *prg, struct _stream_impl *si, int *pskip, char **pdp, int *copied ); \
- int (*get_data)( struct colm_program *prg, struct _stream_impl *si, char *dest, int length ); \
- int (*get_data_source)( struct colm_program *prg, struct _stream_impl *si, char *dest, int length ); \
+ int (*get_parse_block)( struct colm_program *prg, struct _stream_impl *si, int *pskip, alph_t **pdp, int *copied ); \
+ int (*get_data)( struct colm_program *prg, struct _stream_impl *si, alph_t *dest, int length ); \
+ int (*get_data_source)( struct colm_program *prg, struct _stream_impl *si, alph_t *dest, int length ); \
int (*consume_data)( struct colm_program *prg, struct _stream_impl *si, int length, struct colm_location *loc ); \
- int (*undo_consume_data)( struct colm_program *prg, struct _stream_impl *si, const char *data, int length ); \
+ int (*undo_consume_data)( struct colm_program *prg, struct _stream_impl *si, const alph_t *data, int length ); \
void (*transfer_loc)( struct colm_program *prg, struct colm_location *loc, struct _stream_impl *si ); \
struct colm_str_collect *(*get_collect)( struct colm_program *prg, struct _stream_impl *si ); \
void (*flush_stream)( struct colm_program *prg, struct _stream_impl *si ); \
@@ -99,7 +101,7 @@ struct stream_funcs \
void (*print_tree)( struct colm_program *prg, struct colm_tree **sp, \
struct _stream_impl *impl, struct colm_tree *tree, int trim ); \
struct stream_impl *(*split_consumed)( struct colm_program *prg, struct _stream_impl *si ); \
- int (*append_data)( struct colm_program *prg, struct _stream_impl *si, const char *data, int len ); \
+ int (*append_data)( struct colm_program *prg, struct _stream_impl *si, const alph_t *data, int len ); \
int (*undo_append_data)( struct colm_program *prg, struct _stream_impl *si, int length ); \
void (*destructor)( struct colm_program *prg, struct colm_tree **sp, struct _stream_impl *si ); \
int (*get_option)( struct colm_program *prg, struct _stream_impl *si, int option ); \
@@ -165,7 +167,7 @@ struct run_buf
/* Must be at the end. We will grow this struct to add data if the input
* demands it. */
- char data[FSM_BUFSIZE];
+ alph_t data[FSM_BUFSIZE];
};
struct run_buf *new_run_buf( int sz );
@@ -180,7 +182,7 @@ struct stream_impl_data
struct run_buf *tail;
} queue;
- const char *data;
+ const alph_t *data;
long dlen;
int offset;
@@ -220,7 +222,7 @@ struct colm_stream *colm_stream_open_collect( struct colm_program *prg );
char *colm_filename_add( struct colm_program *prg, const char *fn );
struct stream_impl *colm_impl_new_accum( char *name );
struct stream_impl *colm_impl_consumed( char *name, int len );
-struct stream_impl *colm_impl_new_text( char *name, const char *data, int len );
+struct stream_impl *colm_impl_new_text( char *name, const alph_t *data, int len );
#ifdef __cplusplus
}
diff --git a/colm/keyops.h b/colm/keyops.h
index 094b09e2..ed58db8d 100644
--- a/colm/keyops.h
+++ b/colm/keyops.h
@@ -89,6 +89,7 @@ struct HostType
{
const char *data1;
const char *data2;
+ bool isSigned;
long long minVal;
long long maxVal;
unsigned int size;
@@ -98,8 +99,7 @@ struct HostLang
{
HostType *hostTypes;
int numHostTypes;
- HostType *defaultAlphType;
- bool explicitUnsigned;
+ int defaultHostType;
};
extern HostLang *hostLang;
@@ -113,9 +113,9 @@ struct KeyOps
KeyOps() : alphType(0) {}
Key minKey, maxKey;
- HostType *alphType;
+ const HostType *alphType;
- void setAlphType( HostType *alphType )
+ void setAlphType( const HostType *alphType )
{
this->alphType = alphType;
minKey = (long) alphType->minVal;
diff --git a/colm/main.cc b/colm/main.cc
index 501ae16b..e7cbd5e9 100644
--- a/colm/main.cc
+++ b/colm/main.cc
@@ -103,12 +103,21 @@ void version();
/* Total error count. */
int gblErrorCount = 0;
+/*
+ * Alphabet Type for the parsing machinery. The trees/strings of parsed data
+ * all use char type. Currently we can support signed char, unsigned char or
+ * char. If changing this, the colm_alph_t type needs to change as well.
+ * Currently, this is a compile time change only. A colm binary currently
+ * connot be made to work with multiple alphabet types.
+ */
+
HostType hostTypesC[] =
{
- { "char", 0, CHAR_MIN, CHAR_MAX, sizeof(char) },
+ { "unsigned", "char", false, 0, UCHAR_MAX, sizeof(unsigned char) },
};
-HostLang hostLangC = { hostTypesC, 8, hostTypesC+0, true };
+
+HostLang hostLangC = { hostTypesC, 1, 0 };
HostLang *hostLang = &hostLangC;
/* Print the opening to an error in the input, then return the error ostream. */
diff --git a/colm/pdacodegen.cc b/colm/pdacodegen.cc
index 5e069998..d6435ea9 100644
--- a/colm/pdacodegen.cc
+++ b/colm/pdacodegen.cc
@@ -397,7 +397,7 @@ void PdaCodeGen::writeRuntimeData( colm_sections *runtimeData, struct pda_tables
out << "static const char *" << litdata() << "[] = {\n";
for ( int i = 0; i < runtimeData->num_literals; i++ ) {
out << "\t\"";
- escapeLiteralString( out, runtimeData->litdata[i] );
+ escapeLiteralString( out, runtimeData->litdata[i], runtimeData->litlen[i] );
out << "\",\n";
}
out << "};\n\n";
diff --git a/colm/pdarun.c b/colm/pdarun.c
index 59b8a947..b2c2ca66 100644
--- a/colm/pdarun.c
+++ b/colm/pdarun.c
@@ -112,7 +112,7 @@ head_t *colm_stream_pull( program_t *prg, tree_t **sp, struct pda_run *pda_run,
pda_run->consume_buf = run_buf;
}
- char *dest = run_buf->data + run_buf->length;
+ alph_t *dest = run_buf->data + run_buf->length;
is->funcs->get_data( prg, is, dest, length );
location_t *loc = location_allocate( prg );
@@ -123,14 +123,14 @@ head_t *colm_stream_pull( program_t *prg, tree_t **sp, struct pda_run *pda_run,
pda_run->p = pda_run->pe = 0;
pda_run->tokpref = 0;
- head_t *tokdata = colm_string_alloc_pointer( prg, dest, length );
+ head_t *tokdata = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length );
tokdata->location = loc;
return tokdata;
}
else {
head_t *head = init_str_space( length );
- char *dest = (char*)head->data;
+ alph_t *dest = (alph_t*)head->data;
is->funcs->get_data( prg, is, dest, length );
location_t *loc = location_allocate( prg );
@@ -143,7 +143,7 @@ head_t *colm_stream_pull( program_t *prg, tree_t **sp, struct pda_run *pda_run,
void colm_stream_push_text( struct colm_program *prg, struct input_impl *is, const char *data, long length )
{
- is->funcs->prepend_data( prg, is, data, length );
+ is->funcs->prepend_data( prg, is, colm_alph_from_cstr( data ), length );
}
void colm_stream_push_tree( struct colm_program *prg, struct input_impl *is, tree_t *tree, int ignore )
@@ -170,7 +170,7 @@ void colm_undo_stream_push( program_t *prg, tree_t **sp, struct input_impl *is,
/* Should only be sending back whole tokens/ignores, therefore the send back
* should never cross a buffer boundary. Either we slide back data, or we move to
* a previous buffer and slide back data. */
-static void send_back_text( struct colm_program *prg, struct input_impl *is, const char *data, long length )
+static void send_back_text( struct colm_program *prg, struct input_impl *is, const alph_t *data, long length )
{
//debug( REALM_PARSE, "push back of %ld characters\n", length );
@@ -209,7 +209,7 @@ static void send_back_ignore( program_t *prg, tree_t **sp,
if ( artificial )
send_back_tree( prg, is, parse_tree->shadow->tree );
else
- send_back_text( prg, is, string_data( head ), head->length );
+ send_back_text( prg, is, colm_alph_from_cstr( string_data( head ) ), head->length );
}
colm_decrement_steps( pda_run );
@@ -278,7 +278,7 @@ static void send_back( program_t *prg, tree_t **sp, struct pda_run *pda_run,
}
/* Push back the token data. */
- send_back_text( prg, is, string_data( parse_tree->shadow->tree->tokdata ),
+ send_back_text( prg, is, colm_alph_from_cstr( string_data( parse_tree->shadow->tree->tokdata ) ),
string_length( parse_tree->shadow->tree->tokdata ) );
/* If eof was just sent back remember that it needs to be sent again. */
@@ -377,7 +377,7 @@ kid_t *make_token_with_data( program_t *prg, struct pda_run *pda_run,
for ( i = 0; i < lel_info[id].num_capture_attr; i++ ) {
CaptureAttr *ca = &prg->rtd->capture_attr[lel_info[id].capture_attr + i];
head_t *data = string_alloc_full( prg,
- pda_run->mark[ca->mark_enter],
+ colm_cstr_from_alph( pda_run->mark[ca->mark_enter] ),
pda_run->mark[ca->mark_leave] -
pda_run->mark[ca->mark_enter] );
tree_t *string = construct_string( prg, data );
@@ -736,9 +736,9 @@ static head_t *extract_match( program_t *prg, tree_t **sp,
pda_run->consume_buf = run_buf;
}
- char *dest = run_buf->data + run_buf->length;
+ alph_t *dest = run_buf->data + run_buf->length;
- is->funcs->get_data( prg, is, dest, length );
+ is->funcs->get_data( prg, is, (alph_t*)dest, length );
location_t *location = location_allocate( prg );
is->funcs->consume_data( prg, is, length, location );
@@ -748,7 +748,7 @@ static head_t *extract_match( program_t *prg, tree_t **sp,
pda_run->tokpref = 0;
pda_run->tokstart = 0;
- head_t *head = colm_string_alloc_pointer( prg, dest, length );
+ head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length );
head->location = location;
@@ -793,7 +793,7 @@ static head_t *extract_no_l( program_t *prg, tree_t **sp,
pda_run->consume_buf = run_buf;
}
- char *dest = run_buf->data + run_buf->length;
+ alph_t *dest = run_buf->data + run_buf->length;
is->funcs->get_data( prg, is, dest, length );
@@ -808,7 +808,7 @@ static head_t *extract_no_l( program_t *prg, tree_t **sp,
pda_run->tokpref = 0;
pda_run->tokstart = 0;
- head_t *head = colm_string_alloc_pointer( prg, dest, length );
+ head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length );
/* Don't pass the location. */
head->location = 0;
@@ -849,14 +849,14 @@ static head_t *peek_match( program_t *prg, struct pda_run *pda_run, struct input
pda_run->consume_buf = run_buf;
}
- char *dest = run_buf->data + run_buf->length;
+ alph_t *dest = run_buf->data + run_buf->length;
is->funcs->get_data( prg, is, dest, length );
pda_run->p = pda_run->pe = 0;
pda_run->tokpref = 0;
- head_t *head = colm_string_alloc_pointer( prg, dest, length );
+ head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length );
head->location = location_allocate( prg );
is->funcs->transfer_loc( prg, head->location, is );
@@ -1093,7 +1093,7 @@ static long scan_token( program_t *prg, struct pda_run *pda_run, struct input_im
return SCAN_UNDO;
while ( true ) {
- char *pd = 0;
+ alph_t *pd = 0;
int len = 0;
int tokpref = pda_run->tokpref;
int type = is->funcs->get_parse_block( prg, is, &tokpref, &pd, &len );
diff --git a/colm/pdarun.h b/colm/pdarun.h
index 3cee2124..27e075c9 100644
--- a/colm/pdarun.h
+++ b/colm/pdarun.h
@@ -268,18 +268,18 @@ struct pda_run
long region, pre_region;
long fsm_cs, next_cs, act;
- char *start;
- char *tokstart;
+ alph_t *start;
+ alph_t *tokstart;
long tokend;
long tokpref;
- char *p, *pe;
+ alph_t *p, *pe;
char scan_eof;
char return_result;
char skip_tokpref;
char eof_term_recvd;
- char *mark[MARK_SLOTS];
+ alph_t *mark[MARK_SLOTS];
long matched_token;
/*
diff --git a/colm/print.c b/colm/print.c
index 2b69e86a..363a7eea 100644
--- a/colm/print.c
+++ b/colm/print.c
@@ -59,7 +59,7 @@ static void xml_escape_data( struct colm_print_args *print_args, const char *dat
void init_str_collect( str_collect_t *collect )
{
- collect->data = (char*) malloc( BUFFER_INITIAL_SIZE );
+ collect->data = malloc( BUFFER_INITIAL_SIZE );
collect->allocated = BUFFER_INITIAL_SIZE;
collect->length = 0;
collect->indent.indent = 0;
@@ -76,7 +76,7 @@ void str_collect_append( str_collect_t *collect, const char *data, long len )
long new_len = collect->length + len;
if ( new_len > collect->allocated ) {
collect->allocated = new_len * 2;
- collect->data = (char*) realloc( collect->data, collect->allocated );
+ collect->data = realloc( collect->data, collect->allocated );
}
memcpy( collect->data + collect->length, data, len );
collect->length += len;
@@ -91,7 +91,7 @@ void str_collect_clear( str_collect_t *collect )
void print_str( struct colm_print_args *print_args, head_t *str )
{
- print_args->out( print_args, (char*)(str->data), str->length );
+ print_args->out( print_args, str->data, str->length );
}
void append_collect( struct colm_print_args *args, const char *data, int length )
@@ -128,7 +128,7 @@ restart:
}
}
else {
- char *nl;
+ const char *nl;
if ( args->indent->level != COLM_INDENT_OFF &&
(nl = memchr( data, '\n', length )) )
{
@@ -559,7 +559,7 @@ static void xml_term( program_t *prg, tree_t **sp,
else if ( kid->tree->id == LEL_ID_STR ) {
head_t *head = (head_t*) ((str_t*)kid->tree)->value;
- xml_escape_data( print_args, (char*)(head->data), head->length );
+ xml_escape_data( print_args, head->data, head->length );
}
else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->first_non_term_id &&
kid->tree->id != LEL_ID_IGNORE &&
@@ -720,7 +720,6 @@ static void postfix_close( program_t *prg, tree_t **sp,
sprintf( buf, " %d", children );
args->out( args, buf, strlen( buf ) );
-
args->out( args, "\n", 1 );
}
}
diff --git a/colm/stream.c b/colm/stream.c
index 0aa836dd..abe1b636 100644
--- a/colm/stream.c
+++ b/colm/stream.c
@@ -166,7 +166,7 @@ struct run_buf *new_run_buf( int sz )
}
/* Keep the position up to date after consuming text. */
-void update_position_data( struct stream_impl_data *is, const char *data, long length )
+void update_position_data( struct stream_impl_data *is, const alph_t *data, long length )
{
int i;
for ( i = 0; i < length; i++ ) {
@@ -184,7 +184,7 @@ void update_position_data( struct stream_impl_data *is, const char *data, long l
}
/* Keep the position up to date after sending back text. */
-void undo_position_data( struct stream_impl_data *is, const char *data, long length )
+void undo_position_data( struct stream_impl_data *is, const alph_t *data, long length )
{
/* FIXME: this needs to fetch the position information from the parsed
* token and restore based on that.. */
@@ -221,7 +221,7 @@ static void data_transfer_loc( struct colm_program *prg, location_t *loc,
*/
static int data_get_data( struct colm_program *prg, struct stream_impl_data *ss,
- char *dest, int length )
+ alph_t *dest, int length )
{
int copied = 0;
@@ -249,7 +249,7 @@ static int data_get_data( struct colm_program *prg, struct stream_impl_data *ss,
/* Anything available in the current buffer. */
if ( avail > 0 ) {
/* The source data from the current buffer. */
- char *src = &buf->data[buf->offset];
+ alph_t *src = &buf->data[buf->offset];
int slen = avail < length ? avail : length;
memcpy( dest+copied, src, slen ) ;
@@ -280,7 +280,7 @@ static struct stream_impl *data_split_consumed( program_t *prg, struct stream_im
}
int data_append_data( struct colm_program *prg, struct stream_impl_data *sid,
- const char *data, int length )
+ const alph_t *data, int length )
{
struct run_buf *tail = sid->queue.tail;
if ( tail == 0 || length > (FSM_BUFSIZE - tail->length) ) {
@@ -414,7 +414,7 @@ static void data_print_tree( struct colm_program *prg, tree_t **sp,
}
static int data_get_parse_block( struct colm_program *prg, struct stream_impl_data *ss,
- int *pskip, char **pdp, int *copied )
+ int *pskip, alph_t **pdp, int *copied )
{
int ret = 0;
*copied = 0;
@@ -448,7 +448,7 @@ static int data_get_parse_block( struct colm_program *prg, struct stream_impl_da
/* Anything available in the current buffer. */
if ( avail > 0 ) {
/* The source data from the current buffer. */
- char *src = &buf->data[buf->offset];
+ alph_t *src = &buf->data[buf->offset];
/* Need to skip? */
if ( *pskip > 0 && *pskip >= avail ) {
@@ -523,9 +523,9 @@ static int data_consume_data( struct colm_program *prg, struct stream_impl_data
}
static int data_undo_consume_data( struct colm_program *prg, struct stream_impl_data *sid,
- const char *data, int length )
+ const alph_t *data, int length )
{
- const char *end = data + length;
+ const alph_t *end = data + length;
int amount = length;
if ( amount > sid->consumed )
amount = sid->consumed;
@@ -571,7 +571,7 @@ static int data_undo_consume_data( struct colm_program *prg, struct stream_impl_
*/
static int file_get_data_source( struct colm_program *prg, struct stream_impl_data *si,
- char *dest, int length )
+ alph_t *dest, int length )
{
return fread( dest, 1, length, si->file );
}
@@ -581,7 +581,7 @@ static int file_get_data_source( struct colm_program *prg, struct stream_impl_da
*/
static int accum_get_data_source( struct colm_program *prg, struct stream_impl_data *si,
- char *dest, int want )
+ alph_t *dest, int want )
{
long avail = si->dlen - si->offset;
long take = avail < want ? avail : want;
@@ -710,14 +710,14 @@ struct stream_impl *colm_impl_consumed( char *name, int len )
return (struct stream_impl*)si;
}
-struct stream_impl *colm_impl_new_text( char *name, const char *data, int len )
+struct stream_impl *colm_impl_new_text( char *name, const alph_t *data, int len )
{
struct stream_impl_data *si = (struct stream_impl_data*)
malloc(sizeof(struct stream_impl_data));
si_data_init( si, name );
si->funcs = (struct stream_funcs*)&accum_funcs;
- char *buf = (char*)malloc( len );
+ alph_t *buf = (alph_t*)malloc( len );
memcpy( buf, data, len );
si->data = buf;
@@ -778,7 +778,7 @@ stream_t *colm_stream_open_file( program_t *prg, tree_t *name, tree_t *mode )
}
/* Need to make a C-string (null terminated). */
- char *file_name = (char*)malloc(string_length(head_name)+1);
+ char *file_name = malloc(string_length(head_name)+1);
memcpy( file_name, string_data(head_name), string_length(head_name) );
file_name[string_length(head_name)] = 0;
diff --git a/colm/string.c b/colm/string.c
index 8a852e8b..d1d16aa9 100644
--- a/colm/string.c
+++ b/colm/string.c
@@ -273,9 +273,9 @@ head_t *make_literal( program_t *prg, long offset )
head_t *string_sprintf( program_t *prg, str_t *format, long integer )
{
head_t *format_head = format->value;
- long written = snprintf( 0, 0, string_data(format_head), integer );
+ long written = snprintf( 0, 0, (char*)string_data(format_head), integer );
head_t *head = init_str_space( written+1 );
- written = snprintf( (char*)head->data, written+1, string_data(format_head), integer );
+ written = snprintf( (char*)head->data, written+1, (char*)string_data(format_head), integer );
head->length -= 1;
return head;
}