diff options
-rw-r--r-- | colm/bytecode.c | 31 | ||||
-rw-r--r-- | colm/colm.h | 4 | ||||
-rw-r--r-- | colm/compiler.cc | 8 | ||||
-rw-r--r-- | colm/compiler.h | 2 | ||||
-rw-r--r-- | colm/ctinput.cc | 27 | ||||
-rw-r--r-- | colm/input.c | 10 | ||||
-rw-r--r-- | colm/input.h | 30 | ||||
-rw-r--r-- | colm/keyops.h | 8 | ||||
-rw-r--r-- | colm/main.cc | 13 | ||||
-rw-r--r-- | colm/pdacodegen.cc | 2 | ||||
-rw-r--r-- | colm/pdarun.c | 32 | ||||
-rw-r--r-- | colm/pdarun.h | 8 | ||||
-rw-r--r-- | colm/print.c | 11 | ||||
-rw-r--r-- | colm/stream.c | 28 | ||||
-rw-r--r-- | colm/string.c | 4 |
15 files changed, 122 insertions, 96 deletions
diff --git a/colm/bytecode.c b/colm/bytecode.c index 155a6a25..af50f679 100644 --- a/colm/bytecode.c +++ b/colm/bytecode.c @@ -35,6 +35,7 @@ #include <colm/pool.h> #include <colm/debug.h> +#include <colm/colm.h> #define TRUE_VAL 1 #define FALSE_VAL 0 @@ -215,7 +216,7 @@ static word_t stream_append_text( program_t *prg, tree_t **sp, input_t *dest, tr colm_print_tree_collect( prg, sp, &collect, input, trim ); /* Load it into the input. */ - impl->funcs->append_data( prg, impl, collect.data, collect.length ); + impl->funcs->append_data( prg, impl, colm_alph_from_cstr( collect.data ), collect.length ); length = collect.length; str_collect_destroy( &collect ); } @@ -238,7 +239,7 @@ static word_t stream_append_tree( program_t *prg, tree_t **sp, input_t *dest, tr colm_print_tree_collect( prg, sp, &collect, to_append, false ); /* Load it into the to_append. */ - impl->funcs->append_data( prg, impl, collect.data, collect.length ); + impl->funcs->append_data( prg, impl, colm_alph_from_cstr( collect.data ), collect.length ); length = collect.length; str_collect_destroy( &collect ); } @@ -288,10 +289,11 @@ static tree_t *stream_pull_bc( program_t *prg, tree_t **sp, struct pda_run *pda_ } -static void undo_stream_pull( struct colm_program *prg, struct input_impl *is, const char *data, long length ) +static void undo_stream_pull( struct colm_program *prg, struct input_impl *is, + const char *data, long length ) { //debug( REALM_PARSE, "undoing stream pull\n" ); - is->funcs->undo_consume_data( prg, is, data, length ); + is->funcs->undo_consume_data( prg, is, colm_alph_from_cstr( data ), length ); } static void undo_pull( program_t *prg, input_t *input, tree_t *str ) @@ -407,12 +409,14 @@ static void downref_locals( program_t *prg, tree_t ***psp, } } + static tree_t *construct_arg0( program_t *prg, int argc, const char **argv, const int *argl ) { tree_t *arg0 = 0; if ( argc > 0 ) { - size_t len = argl != 0 ? argl[0] : strlen(argv[0]); - head_t *head = colm_string_alloc_pointer( prg, argv[0], len ); + const char *argv0 = argv[0]; + size_t len = argl != 0 ? argl[0] : strlen( argv[0] ); + head_t *head = colm_string_alloc_pointer( prg, argv0, len ); arg0 = construct_string( prg, head ); colm_tree_upref( prg, arg0 ); } @@ -425,7 +429,8 @@ static list_t *construct_argv( program_t *prg, int argc, const char **argv, cons int i; for ( i = 1; i < argc; i++ ) { size_t len = argl != 0 ? argl[i] : strlen(argv[i]); - head_t *head = colm_string_alloc_pointer( prg, argv[i], len ); + const char *argv_i = argv[i]; + head_t *head = colm_string_alloc_pointer( prg, argv_i, len ); tree_t *arg = construct_string( prg, head ); colm_tree_upref( prg, arg ); @@ -590,7 +595,9 @@ tree_t *colm_run_func( struct colm_program *prg, int frame_id, ((value_t*)execution.call_args)[p] = 0; } else { - head_t *head = colm_string_alloc_pointer( prg, params[p], strlen(params[p]) ); + const char *param_p = params[p]; + size_t param_len = strlen(params[p]); + head_t *head = colm_string_alloc_pointer( prg, param_p, param_len ); tree_t *tree = construct_string( prg, head ); colm_tree_upref( prg, tree ); ((tree_t**)execution.call_args)[p] = tree; @@ -966,13 +973,14 @@ again: * the local frame now. */ struct lang_el_info *lel_info = prg->rtd->lel_info; struct pda_run *pda_run = exec->parser->pda_run; - char **mark = pda_run->mark; + alph_t **mark = pda_run->mark; int i, num_capture_attr = lel_info[pda_run->token_id].num_capture_attr; for ( i = 0; i < num_capture_attr; i++ ) { struct lang_el_info *lei = &lel_info[exec->parser->pda_run->token_id]; CaptureAttr *ca = &prg->rtd->capture_attr[lei->capture_attr + i]; - head_t *data = string_alloc_full( prg, mark[ca->mark_enter], + head_t *data = string_alloc_full( prg, + colm_cstr_from_alph( mark[ca->mark_enter] ), mark[ca->mark_leave] - mark[ca->mark_enter] ); tree_t *string = construct_string( prg, data ); colm_tree_upref( prg, string ); @@ -3186,7 +3194,8 @@ again: tree_t *str = 0; if ( tree->tokdata->location ) { const char *fn = tree->tokdata->location->name; - head_t *data = string_alloc_full( prg, fn, strlen(fn) ); + size_t fnlen = strlen( fn ); + head_t *data = string_alloc_full( prg, fn, fnlen ); str = construct_string( prg, data ); colm_tree_upref( prg, str ); } diff --git a/colm/colm.h b/colm/colm.h index 5091d27e..39506cab 100644 --- a/colm/colm.h +++ b/colm/colm.h @@ -45,6 +45,7 @@ struct indent_impl extern struct colm_sections colm_object; typedef unsigned long colm_value_t; +typedef unsigned char colm_alph_t; struct colm_tree { @@ -99,6 +100,9 @@ struct colm_tree *colm_get_left_repeat_next( struct colm_tree *tree ); struct colm_tree *colm_get_left_repeat_val( struct colm_tree *tree ); struct colm_location *colm_find_location( struct colm_program *prg, struct colm_tree *tree ); +static inline const colm_alph_t *colm_alph_from_cstr( const char *cstr ) { return (const colm_alph_t*)cstr; } +static inline const char *colm_cstr_from_alph( const colm_alph_t *alph ) { return (const char*)alph; } + /* Debug realms. To turn on, pass to colm_set_debug before invocation. */ #define COLM_DBG_BYTECODE 0x00000001 #define COLM_DBG_PARSE 0x00000002 diff --git a/colm/compiler.cc b/colm/compiler.cc index 6b2b1032..72f87dac 100644 --- a/colm/compiler.cc +++ b/colm/compiler.cc @@ -86,12 +86,13 @@ Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd ) unsigned long ul = strtoul( str, 0, 16 ); + if ( errno == ERANGE || (unusedBits && ul >> (size * 8)) ) { error(loc) << "literal " << str << " overflows the alphabet type" << endl; ul = 1 << (size * 8); } - if ( unusedBits && ul >> (size * 8 - 1) ) + if ( keyOps->alphType->isSigned && unusedBits && ul >> (size * 8 - 1) ) ul |= (ULONG_MAX >> (size*8 ) ) << (size*8); return Key( (long)ul ); @@ -492,7 +493,8 @@ void Compiler::initGraphDict( ) void Compiler::initKeyOps( ) { /* Signedness and bounds. */ - HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType; + const HostType *alphType = alphTypeSet ? userAlphType : + &hostLang->hostTypes[hostLang->defaultHostType]; thisKeyOps.setAlphType( alphType ); if ( lowerNum != 0 ) { @@ -1022,7 +1024,7 @@ pda_run *Compiler::parsePattern( program_t *prg, tree_t **sp, const InputLoc &lo if ( pdaRun->parse_error_text != 0 ) { colm_data *tokdata = pdaRun->parse_error_text->tokdata; cerr << ": relative error: "; - cerr.write( tokdata->data, tokdata->length ); + cerr.write( (const char*)tokdata->data, tokdata->length ); } else { cerr << ": parse error"; diff --git a/colm/compiler.h b/colm/compiler.h index f22b33e3..67d5b40e 100644 --- a/colm/compiler.h +++ b/colm/compiler.h @@ -596,7 +596,7 @@ struct Compiler int nextPriorKey, nextNameId; /* Alphabet type. */ - HostType *userAlphType; + const HostType *userAlphType; bool alphTypeSet; /* Element type and get key expression. */ diff --git a/colm/ctinput.cc b/colm/ctinput.cc index efc2c69d..f8267487 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -88,7 +88,7 @@ struct input_impl *colm_impl_new_pat( char *name, Pattern *pattern ) } int pat_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, int *pskip, - char **pdp, int *copied ) + alph_t **pdp, int *copied ) { *copied = 0; @@ -107,7 +107,7 @@ int pat_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, int if ( avail > 0 ) { /* The source data from the current buffer. */ - char *src = &buf->data[offset]; + alph_t *src = (alph_t*)&buf->data[offset]; int slen = avail; /* Need to skip? */ @@ -136,7 +136,7 @@ int pat_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, int } -int pat_get_data( struct colm_program *prg, struct input_impl_ct *ss, char *dest, int length ) +int pat_get_data( struct colm_program *prg, struct input_impl_ct *ss, alph_t *dest, int length ) { int copied = 0; @@ -215,7 +215,7 @@ int pat_consume_data( struct colm_program *prg, struct input_impl_ct *ss, int le return consumed; } -int pat_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const char *data, int length ) +int pat_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const alph_t *data, int length ) { int origLen = length; while ( true ) { @@ -239,7 +239,7 @@ int pat_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, c } LangEl *pat_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss, long *bindId, - char **data, long *length ) + alph_t **data, long *length ) { LangEl *klangEl = ss->pat_item->prodEl->langEl; *bindId = ss->pat_item->bindId; @@ -310,7 +310,8 @@ struct input_impl *colm_impl_new_cons( char *name, Constructor *constructor ) return (struct input_impl*)ss; } -LangEl *repl_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss, long *bindId, char **data, long *length ) +LangEl *repl_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss, + long *bindId, alph_t **data, long *length ) { LangEl *klangEl = ss->cons_item->type == ConsItem::ExprType ? ss->cons_item->langEl : ss->cons_item->prodEl->langEl; @@ -326,7 +327,7 @@ LangEl *repl_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss ss->cons_item->prodEl->typeRef->pdaLiteral->data, ss->cons_item->prodEl->typeRef->pdaLiteral->loc ); - *data = ss->cons_item->data; + *data = (alph_t*)ss->cons_item->data.data; *length = ss->cons_item->data.length(); } } @@ -337,7 +338,7 @@ LangEl *repl_consume_lang_el( struct colm_program *prg, struct input_impl_ct *ss } int repl_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, - int *pskip, char **pdp, int *copied ) + int *pskip, alph_t **pdp, int *copied ) { *copied = 0; @@ -356,7 +357,7 @@ int repl_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, if ( avail > 0 ) { /* The source data from the current buffer. */ - char *src = &buf->data[offset]; + alph_t *src = (alph_t*)&buf->data[offset]; int slen = avail; /* Need to skip? */ @@ -384,7 +385,7 @@ int repl_get_parse_block( struct colm_program *prg, struct input_impl_ct *ss, return INPUT_DATA; } -int repl_get_data( struct colm_program *prg, struct input_impl_ct *ss, char *dest, int length ) +int repl_get_data( struct colm_program *prg, struct input_impl_ct *ss, alph_t *dest, int length ) { int copied = 0; @@ -468,7 +469,7 @@ int repl_consume_data( struct colm_program *prg, struct input_impl_ct *ss, int l return consumed; } -int repl_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const char *data, int length ) +int repl_undo_consume_data( struct colm_program *prg, struct input_impl_ct *ss, const alph_t *data, int length ) { int origLen = length; while ( true ) { @@ -527,7 +528,7 @@ extern "C" void internalSendNamedLangEl( program_t *prg, tree_t **sp, { /* All three set by consumeLangEl. */ long bindId; - char *data; + alph_t *data; long length; LangEl *klangEl = is->funcs->consume_lang_el( prg, is, &bindId, &data, &length ); @@ -537,7 +538,7 @@ extern "C" void internalSendNamedLangEl( program_t *prg, tree_t **sp, /* Copy the token data. */ head_t *tokdata = 0; if ( data != 0 ) - tokdata = string_alloc_full( prg, data, length ); + tokdata = string_alloc_full( prg, colm_cstr_from_alph( data ), length ); kid_t *input = make_token_with_data( prg, pdaRun, is, klangEl->id, tokdata ); diff --git a/colm/input.c b/colm/input.c index b8101c6f..4342249a 100644 --- a/colm/input.c +++ b/colm/input.c @@ -253,7 +253,7 @@ static void input_set_option( struct colm_program *prg, struct input_impl_seq *i static int input_get_parse_block( struct colm_program *prg, struct input_impl_seq *is, - int *pskip, char **pdp, int *copied ) + int *pskip, alph_t **pdp, int *copied ) { int ret = 0; *copied = 0; @@ -326,7 +326,7 @@ static int input_get_parse_block( struct colm_program *prg, struct input_impl_se } static int input_get_data( struct colm_program *prg, struct input_impl_seq *is, - char *dest, int length ) + alph_t *dest, int length ) { int copied = 0; @@ -414,7 +414,7 @@ static int input_consume_data( struct colm_program *prg, struct input_impl_seq * } static int input_undo_consume_data( struct colm_program *prg, struct input_impl_seq *si, - const char *data, int length ) + const alph_t *data, int length ) { /* When we push back data we need to move backwards through the block of * text. The source stream type will */ @@ -492,7 +492,7 @@ static void input_undo_consume_tree( struct colm_program *prg, struct input_impl * Prepend */ static void input_prepend_data( struct colm_program *prg, struct input_impl_seq *si, - const char *data, long length ) + const alph_t *data, long length ) { debug( prg, REALM_INPUT, "input_prepend_data: stream %p prepend data length %d\n", si, length ); @@ -577,7 +577,7 @@ static tree_t *input_undo_prepend_stream( struct colm_program *prg, struct input } static void input_append_data( struct colm_program *prg, struct input_impl_seq *si, - const char *data, long length ) + const alph_t *data, long length ) { debug( prg, REALM_INPUT, "input_append_data: stream %p append data length %d\n", si, length ); diff --git a/colm/input.h b/colm/input.h index f116561f..c0a896d5 100644 --- a/colm/input.h +++ b/colm/input.h @@ -54,24 +54,26 @@ struct colm_stream; struct input_impl; struct stream_impl; +typedef colm_alph_t alph_t; + #define DEF_INPUT_FUNCS( input_funcs, _input_impl ) \ struct input_funcs \ { \ - int (*get_parse_block)( struct colm_program *prg, struct _input_impl *si, int *pskip, char **pdp, int *copied ); \ - int (*get_data)( struct colm_program *prg, struct _input_impl *si, char *dest, int length ); \ + int (*get_parse_block)( struct colm_program *prg, struct _input_impl *si, int *pskip, alph_t **pdp, int *copied ); \ + int (*get_data)( struct colm_program *prg, struct _input_impl *si, alph_t *dest, int length ); \ int (*consume_data)( struct colm_program *prg, struct _input_impl *si, int length, struct colm_location *loc ); \ - int (*undo_consume_data)( struct colm_program *prg, struct _input_impl *si, const char *data, int length ); \ + int (*undo_consume_data)( struct colm_program *prg, struct _input_impl *si, const alph_t *data, int length ); \ struct colm_tree *(*consume_tree)( struct colm_program *prg, struct _input_impl *si ); \ void (*undo_consume_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree, int ignore ); \ - struct LangEl *(*consume_lang_el)( struct colm_program *prg, struct _input_impl *si, long *bind_id, char **data, long *length ); \ + struct LangEl *(*consume_lang_el)( struct colm_program *prg, struct _input_impl *si, long *bind_id, alph_t **data, long *length ); \ void (*undo_consume_lang_el)( struct colm_program *prg, struct _input_impl *si ); \ - void (*prepend_data)( struct colm_program *prg, struct _input_impl *si, const char *data, long len ); \ + void (*prepend_data)( struct colm_program *prg, struct _input_impl *si, const alph_t *data, long len ); \ int (*undo_prepend_data)( struct colm_program *prg, struct _input_impl *si, int length ); \ void (*prepend_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree, int ignore ); \ struct colm_tree *(*undo_prepend_tree)( struct colm_program *prg, struct _input_impl *si ); \ void (*prepend_stream)( struct colm_program *prg, struct _input_impl *si, struct colm_stream *stream ); \ struct colm_tree *(*undo_prepend_stream)( struct colm_program *prg, struct _input_impl *si ); \ - void (*append_data)( struct colm_program *prg, struct _input_impl *si, const char *data, long length ); \ + void (*append_data)( struct colm_program *prg, struct _input_impl *si, const alph_t *data, long length ); \ struct colm_tree *(*undo_append_data)( struct colm_program *prg, struct _input_impl *si, int length ); \ void (*append_tree)( struct colm_program *prg, struct _input_impl *si, struct colm_tree *tree ); \ struct colm_tree *(*undo_append_tree)( struct colm_program *prg, struct _input_impl *si ); \ @@ -87,11 +89,11 @@ struct input_funcs \ #define DEF_STREAM_FUNCS( stream_funcs, _stream_impl ) \ struct stream_funcs \ { \ - int (*get_parse_block)( struct colm_program *prg, struct _stream_impl *si, int *pskip, char **pdp, int *copied ); \ - int (*get_data)( struct colm_program *prg, struct _stream_impl *si, char *dest, int length ); \ - int (*get_data_source)( struct colm_program *prg, struct _stream_impl *si, char *dest, int length ); \ + int (*get_parse_block)( struct colm_program *prg, struct _stream_impl *si, int *pskip, alph_t **pdp, int *copied ); \ + int (*get_data)( struct colm_program *prg, struct _stream_impl *si, alph_t *dest, int length ); \ + int (*get_data_source)( struct colm_program *prg, struct _stream_impl *si, alph_t *dest, int length ); \ int (*consume_data)( struct colm_program *prg, struct _stream_impl *si, int length, struct colm_location *loc ); \ - int (*undo_consume_data)( struct colm_program *prg, struct _stream_impl *si, const char *data, int length ); \ + int (*undo_consume_data)( struct colm_program *prg, struct _stream_impl *si, const alph_t *data, int length ); \ void (*transfer_loc)( struct colm_program *prg, struct colm_location *loc, struct _stream_impl *si ); \ struct colm_str_collect *(*get_collect)( struct colm_program *prg, struct _stream_impl *si ); \ void (*flush_stream)( struct colm_program *prg, struct _stream_impl *si ); \ @@ -99,7 +101,7 @@ struct stream_funcs \ void (*print_tree)( struct colm_program *prg, struct colm_tree **sp, \ struct _stream_impl *impl, struct colm_tree *tree, int trim ); \ struct stream_impl *(*split_consumed)( struct colm_program *prg, struct _stream_impl *si ); \ - int (*append_data)( struct colm_program *prg, struct _stream_impl *si, const char *data, int len ); \ + int (*append_data)( struct colm_program *prg, struct _stream_impl *si, const alph_t *data, int len ); \ int (*undo_append_data)( struct colm_program *prg, struct _stream_impl *si, int length ); \ void (*destructor)( struct colm_program *prg, struct colm_tree **sp, struct _stream_impl *si ); \ int (*get_option)( struct colm_program *prg, struct _stream_impl *si, int option ); \ @@ -165,7 +167,7 @@ struct run_buf /* Must be at the end. We will grow this struct to add data if the input * demands it. */ - char data[FSM_BUFSIZE]; + alph_t data[FSM_BUFSIZE]; }; struct run_buf *new_run_buf( int sz ); @@ -180,7 +182,7 @@ struct stream_impl_data struct run_buf *tail; } queue; - const char *data; + const alph_t *data; long dlen; int offset; @@ -220,7 +222,7 @@ struct colm_stream *colm_stream_open_collect( struct colm_program *prg ); char *colm_filename_add( struct colm_program *prg, const char *fn ); struct stream_impl *colm_impl_new_accum( char *name ); struct stream_impl *colm_impl_consumed( char *name, int len ); -struct stream_impl *colm_impl_new_text( char *name, const char *data, int len ); +struct stream_impl *colm_impl_new_text( char *name, const alph_t *data, int len ); #ifdef __cplusplus } diff --git a/colm/keyops.h b/colm/keyops.h index 094b09e2..ed58db8d 100644 --- a/colm/keyops.h +++ b/colm/keyops.h @@ -89,6 +89,7 @@ struct HostType { const char *data1; const char *data2; + bool isSigned; long long minVal; long long maxVal; unsigned int size; @@ -98,8 +99,7 @@ struct HostLang { HostType *hostTypes; int numHostTypes; - HostType *defaultAlphType; - bool explicitUnsigned; + int defaultHostType; }; extern HostLang *hostLang; @@ -113,9 +113,9 @@ struct KeyOps KeyOps() : alphType(0) {} Key minKey, maxKey; - HostType *alphType; + const HostType *alphType; - void setAlphType( HostType *alphType ) + void setAlphType( const HostType *alphType ) { this->alphType = alphType; minKey = (long) alphType->minVal; diff --git a/colm/main.cc b/colm/main.cc index 501ae16b..e7cbd5e9 100644 --- a/colm/main.cc +++ b/colm/main.cc @@ -103,12 +103,21 @@ void version(); /* Total error count. */ int gblErrorCount = 0; +/* + * Alphabet Type for the parsing machinery. The trees/strings of parsed data + * all use char type. Currently we can support signed char, unsigned char or + * char. If changing this, the colm_alph_t type needs to change as well. + * Currently, this is a compile time change only. A colm binary currently + * connot be made to work with multiple alphabet types. + */ + HostType hostTypesC[] = { - { "char", 0, CHAR_MIN, CHAR_MAX, sizeof(char) }, + { "unsigned", "char", false, 0, UCHAR_MAX, sizeof(unsigned char) }, }; -HostLang hostLangC = { hostTypesC, 8, hostTypesC+0, true }; + +HostLang hostLangC = { hostTypesC, 1, 0 }; HostLang *hostLang = &hostLangC; /* Print the opening to an error in the input, then return the error ostream. */ diff --git a/colm/pdacodegen.cc b/colm/pdacodegen.cc index 5e069998..d6435ea9 100644 --- a/colm/pdacodegen.cc +++ b/colm/pdacodegen.cc @@ -397,7 +397,7 @@ void PdaCodeGen::writeRuntimeData( colm_sections *runtimeData, struct pda_tables out << "static const char *" << litdata() << "[] = {\n"; for ( int i = 0; i < runtimeData->num_literals; i++ ) { out << "\t\""; - escapeLiteralString( out, runtimeData->litdata[i] ); + escapeLiteralString( out, runtimeData->litdata[i], runtimeData->litlen[i] ); out << "\",\n"; } out << "};\n\n"; diff --git a/colm/pdarun.c b/colm/pdarun.c index 59b8a947..b2c2ca66 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -112,7 +112,7 @@ head_t *colm_stream_pull( program_t *prg, tree_t **sp, struct pda_run *pda_run, pda_run->consume_buf = run_buf; } - char *dest = run_buf->data + run_buf->length; + alph_t *dest = run_buf->data + run_buf->length; is->funcs->get_data( prg, is, dest, length ); location_t *loc = location_allocate( prg ); @@ -123,14 +123,14 @@ head_t *colm_stream_pull( program_t *prg, tree_t **sp, struct pda_run *pda_run, pda_run->p = pda_run->pe = 0; pda_run->tokpref = 0; - head_t *tokdata = colm_string_alloc_pointer( prg, dest, length ); + head_t *tokdata = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length ); tokdata->location = loc; return tokdata; } else { head_t *head = init_str_space( length ); - char *dest = (char*)head->data; + alph_t *dest = (alph_t*)head->data; is->funcs->get_data( prg, is, dest, length ); location_t *loc = location_allocate( prg ); @@ -143,7 +143,7 @@ head_t *colm_stream_pull( program_t *prg, tree_t **sp, struct pda_run *pda_run, void colm_stream_push_text( struct colm_program *prg, struct input_impl *is, const char *data, long length ) { - is->funcs->prepend_data( prg, is, data, length ); + is->funcs->prepend_data( prg, is, colm_alph_from_cstr( data ), length ); } void colm_stream_push_tree( struct colm_program *prg, struct input_impl *is, tree_t *tree, int ignore ) @@ -170,7 +170,7 @@ void colm_undo_stream_push( program_t *prg, tree_t **sp, struct input_impl *is, /* Should only be sending back whole tokens/ignores, therefore the send back * should never cross a buffer boundary. Either we slide back data, or we move to * a previous buffer and slide back data. */ -static void send_back_text( struct colm_program *prg, struct input_impl *is, const char *data, long length ) +static void send_back_text( struct colm_program *prg, struct input_impl *is, const alph_t *data, long length ) { //debug( REALM_PARSE, "push back of %ld characters\n", length ); @@ -209,7 +209,7 @@ static void send_back_ignore( program_t *prg, tree_t **sp, if ( artificial ) send_back_tree( prg, is, parse_tree->shadow->tree ); else - send_back_text( prg, is, string_data( head ), head->length ); + send_back_text( prg, is, colm_alph_from_cstr( string_data( head ) ), head->length ); } colm_decrement_steps( pda_run ); @@ -278,7 +278,7 @@ static void send_back( program_t *prg, tree_t **sp, struct pda_run *pda_run, } /* Push back the token data. */ - send_back_text( prg, is, string_data( parse_tree->shadow->tree->tokdata ), + send_back_text( prg, is, colm_alph_from_cstr( string_data( parse_tree->shadow->tree->tokdata ) ), string_length( parse_tree->shadow->tree->tokdata ) ); /* If eof was just sent back remember that it needs to be sent again. */ @@ -377,7 +377,7 @@ kid_t *make_token_with_data( program_t *prg, struct pda_run *pda_run, for ( i = 0; i < lel_info[id].num_capture_attr; i++ ) { CaptureAttr *ca = &prg->rtd->capture_attr[lel_info[id].capture_attr + i]; head_t *data = string_alloc_full( prg, - pda_run->mark[ca->mark_enter], + colm_cstr_from_alph( pda_run->mark[ca->mark_enter] ), pda_run->mark[ca->mark_leave] - pda_run->mark[ca->mark_enter] ); tree_t *string = construct_string( prg, data ); @@ -736,9 +736,9 @@ static head_t *extract_match( program_t *prg, tree_t **sp, pda_run->consume_buf = run_buf; } - char *dest = run_buf->data + run_buf->length; + alph_t *dest = run_buf->data + run_buf->length; - is->funcs->get_data( prg, is, dest, length ); + is->funcs->get_data( prg, is, (alph_t*)dest, length ); location_t *location = location_allocate( prg ); is->funcs->consume_data( prg, is, length, location ); @@ -748,7 +748,7 @@ static head_t *extract_match( program_t *prg, tree_t **sp, pda_run->tokpref = 0; pda_run->tokstart = 0; - head_t *head = colm_string_alloc_pointer( prg, dest, length ); + head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length ); head->location = location; @@ -793,7 +793,7 @@ static head_t *extract_no_l( program_t *prg, tree_t **sp, pda_run->consume_buf = run_buf; } - char *dest = run_buf->data + run_buf->length; + alph_t *dest = run_buf->data + run_buf->length; is->funcs->get_data( prg, is, dest, length ); @@ -808,7 +808,7 @@ static head_t *extract_no_l( program_t *prg, tree_t **sp, pda_run->tokpref = 0; pda_run->tokstart = 0; - head_t *head = colm_string_alloc_pointer( prg, dest, length ); + head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length ); /* Don't pass the location. */ head->location = 0; @@ -849,14 +849,14 @@ static head_t *peek_match( program_t *prg, struct pda_run *pda_run, struct input pda_run->consume_buf = run_buf; } - char *dest = run_buf->data + run_buf->length; + alph_t *dest = run_buf->data + run_buf->length; is->funcs->get_data( prg, is, dest, length ); pda_run->p = pda_run->pe = 0; pda_run->tokpref = 0; - head_t *head = colm_string_alloc_pointer( prg, dest, length ); + head_t *head = colm_string_alloc_pointer( prg, colm_cstr_from_alph( dest ), length ); head->location = location_allocate( prg ); is->funcs->transfer_loc( prg, head->location, is ); @@ -1093,7 +1093,7 @@ static long scan_token( program_t *prg, struct pda_run *pda_run, struct input_im return SCAN_UNDO; while ( true ) { - char *pd = 0; + alph_t *pd = 0; int len = 0; int tokpref = pda_run->tokpref; int type = is->funcs->get_parse_block( prg, is, &tokpref, &pd, &len ); diff --git a/colm/pdarun.h b/colm/pdarun.h index 3cee2124..27e075c9 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -268,18 +268,18 @@ struct pda_run long region, pre_region; long fsm_cs, next_cs, act; - char *start; - char *tokstart; + alph_t *start; + alph_t *tokstart; long tokend; long tokpref; - char *p, *pe; + alph_t *p, *pe; char scan_eof; char return_result; char skip_tokpref; char eof_term_recvd; - char *mark[MARK_SLOTS]; + alph_t *mark[MARK_SLOTS]; long matched_token; /* diff --git a/colm/print.c b/colm/print.c index 2b69e86a..363a7eea 100644 --- a/colm/print.c +++ b/colm/print.c @@ -59,7 +59,7 @@ static void xml_escape_data( struct colm_print_args *print_args, const char *dat void init_str_collect( str_collect_t *collect ) { - collect->data = (char*) malloc( BUFFER_INITIAL_SIZE ); + collect->data = malloc( BUFFER_INITIAL_SIZE ); collect->allocated = BUFFER_INITIAL_SIZE; collect->length = 0; collect->indent.indent = 0; @@ -76,7 +76,7 @@ void str_collect_append( str_collect_t *collect, const char *data, long len ) long new_len = collect->length + len; if ( new_len > collect->allocated ) { collect->allocated = new_len * 2; - collect->data = (char*) realloc( collect->data, collect->allocated ); + collect->data = realloc( collect->data, collect->allocated ); } memcpy( collect->data + collect->length, data, len ); collect->length += len; @@ -91,7 +91,7 @@ void str_collect_clear( str_collect_t *collect ) void print_str( struct colm_print_args *print_args, head_t *str ) { - print_args->out( print_args, (char*)(str->data), str->length ); + print_args->out( print_args, str->data, str->length ); } void append_collect( struct colm_print_args *args, const char *data, int length ) @@ -128,7 +128,7 @@ restart: } } else { - char *nl; + const char *nl; if ( args->indent->level != COLM_INDENT_OFF && (nl = memchr( data, '\n', length )) ) { @@ -559,7 +559,7 @@ static void xml_term( program_t *prg, tree_t **sp, else if ( kid->tree->id == LEL_ID_STR ) { head_t *head = (head_t*) ((str_t*)kid->tree)->value; - xml_escape_data( print_args, (char*)(head->data), head->length ); + xml_escape_data( print_args, head->data, head->length ); } else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->first_non_term_id && kid->tree->id != LEL_ID_IGNORE && @@ -720,7 +720,6 @@ static void postfix_close( program_t *prg, tree_t **sp, sprintf( buf, " %d", children ); args->out( args, buf, strlen( buf ) ); - args->out( args, "\n", 1 ); } } diff --git a/colm/stream.c b/colm/stream.c index 0aa836dd..abe1b636 100644 --- a/colm/stream.c +++ b/colm/stream.c @@ -166,7 +166,7 @@ struct run_buf *new_run_buf( int sz ) } /* Keep the position up to date after consuming text. */ -void update_position_data( struct stream_impl_data *is, const char *data, long length ) +void update_position_data( struct stream_impl_data *is, const alph_t *data, long length ) { int i; for ( i = 0; i < length; i++ ) { @@ -184,7 +184,7 @@ void update_position_data( struct stream_impl_data *is, const char *data, long l } /* Keep the position up to date after sending back text. */ -void undo_position_data( struct stream_impl_data *is, const char *data, long length ) +void undo_position_data( struct stream_impl_data *is, const alph_t *data, long length ) { /* FIXME: this needs to fetch the position information from the parsed * token and restore based on that.. */ @@ -221,7 +221,7 @@ static void data_transfer_loc( struct colm_program *prg, location_t *loc, */ static int data_get_data( struct colm_program *prg, struct stream_impl_data *ss, - char *dest, int length ) + alph_t *dest, int length ) { int copied = 0; @@ -249,7 +249,7 @@ static int data_get_data( struct colm_program *prg, struct stream_impl_data *ss, /* Anything available in the current buffer. */ if ( avail > 0 ) { /* The source data from the current buffer. */ - char *src = &buf->data[buf->offset]; + alph_t *src = &buf->data[buf->offset]; int slen = avail < length ? avail : length; memcpy( dest+copied, src, slen ) ; @@ -280,7 +280,7 @@ static struct stream_impl *data_split_consumed( program_t *prg, struct stream_im } int data_append_data( struct colm_program *prg, struct stream_impl_data *sid, - const char *data, int length ) + const alph_t *data, int length ) { struct run_buf *tail = sid->queue.tail; if ( tail == 0 || length > (FSM_BUFSIZE - tail->length) ) { @@ -414,7 +414,7 @@ static void data_print_tree( struct colm_program *prg, tree_t **sp, } static int data_get_parse_block( struct colm_program *prg, struct stream_impl_data *ss, - int *pskip, char **pdp, int *copied ) + int *pskip, alph_t **pdp, int *copied ) { int ret = 0; *copied = 0; @@ -448,7 +448,7 @@ static int data_get_parse_block( struct colm_program *prg, struct stream_impl_da /* Anything available in the current buffer. */ if ( avail > 0 ) { /* The source data from the current buffer. */ - char *src = &buf->data[buf->offset]; + alph_t *src = &buf->data[buf->offset]; /* Need to skip? */ if ( *pskip > 0 && *pskip >= avail ) { @@ -523,9 +523,9 @@ static int data_consume_data( struct colm_program *prg, struct stream_impl_data } static int data_undo_consume_data( struct colm_program *prg, struct stream_impl_data *sid, - const char *data, int length ) + const alph_t *data, int length ) { - const char *end = data + length; + const alph_t *end = data + length; int amount = length; if ( amount > sid->consumed ) amount = sid->consumed; @@ -571,7 +571,7 @@ static int data_undo_consume_data( struct colm_program *prg, struct stream_impl_ */ static int file_get_data_source( struct colm_program *prg, struct stream_impl_data *si, - char *dest, int length ) + alph_t *dest, int length ) { return fread( dest, 1, length, si->file ); } @@ -581,7 +581,7 @@ static int file_get_data_source( struct colm_program *prg, struct stream_impl_da */ static int accum_get_data_source( struct colm_program *prg, struct stream_impl_data *si, - char *dest, int want ) + alph_t *dest, int want ) { long avail = si->dlen - si->offset; long take = avail < want ? avail : want; @@ -710,14 +710,14 @@ struct stream_impl *colm_impl_consumed( char *name, int len ) return (struct stream_impl*)si; } -struct stream_impl *colm_impl_new_text( char *name, const char *data, int len ) +struct stream_impl *colm_impl_new_text( char *name, const alph_t *data, int len ) { struct stream_impl_data *si = (struct stream_impl_data*) malloc(sizeof(struct stream_impl_data)); si_data_init( si, name ); si->funcs = (struct stream_funcs*)&accum_funcs; - char *buf = (char*)malloc( len ); + alph_t *buf = (alph_t*)malloc( len ); memcpy( buf, data, len ); si->data = buf; @@ -778,7 +778,7 @@ stream_t *colm_stream_open_file( program_t *prg, tree_t *name, tree_t *mode ) } /* Need to make a C-string (null terminated). */ - char *file_name = (char*)malloc(string_length(head_name)+1); + char *file_name = malloc(string_length(head_name)+1); memcpy( file_name, string_data(head_name), string_length(head_name) ); file_name[string_length(head_name)] = 0; diff --git a/colm/string.c b/colm/string.c index 8a852e8b..d1d16aa9 100644 --- a/colm/string.c +++ b/colm/string.c @@ -273,9 +273,9 @@ head_t *make_literal( program_t *prg, long offset ) head_t *string_sprintf( program_t *prg, str_t *format, long integer ) { head_t *format_head = format->value; - long written = snprintf( 0, 0, string_data(format_head), integer ); + long written = snprintf( 0, 0, (char*)string_data(format_head), integer ); head_t *head = init_str_space( written+1 ); - written = snprintf( (char*)head->data, written+1, string_data(format_head), integer ); + written = snprintf( (char*)head->data, written+1, (char*)string_data(format_head), integer ); head->length -= 1; return head; } |