diff options
Diffstat (limited to 'cord/cordxtra.c')
-rw-r--r-- | cord/cordxtra.c | 243 |
1 files changed, 123 insertions, 120 deletions
diff --git a/cord/cordxtra.c b/cord/cordxtra.c index b0a74622..d5394dba 100644 --- a/cord/cordxtra.c +++ b/cord/cordxtra.c @@ -24,17 +24,17 @@ # include <stdarg.h> # include "cord.h" # include "ec.h" -# define I_HIDE_POINTERS /* So we get access to allocation lock. */ - /* We use this for lazy file reading, */ - /* so that we remain independent */ - /* of the threads primitives. */ +# define I_HIDE_POINTERS /* So we get access to allocation lock. */ + /* We use this for lazy file reading, */ + /* so that we remain independent */ + /* of the threads primitives. */ # include "gc.h" -/* For now we assume that pointer reads and writes are atomic, */ -/* i.e. another thread always sees the state before or after */ -/* a write. This might be false on a Motorola M68K with */ -/* pointers that are not 32-bit aligned. But there probably */ -/* aren't too many threads packages running on those. */ +/* For now we assume that pointer reads and writes are atomic, */ +/* i.e. another thread always sees the state before or after */ +/* a write. This might be false on a Motorola M68K with */ +/* pointers that are not 32-bit aligned. But there probably */ +/* aren't too many threads packages running on those. */ # define ATOMIC_WRITE(x,y) (x) = (y) # define ATOMIC_READ(x) (*(x)) @@ -46,19 +46,19 @@ # define SEEK_END 2 # endif -# define BUFSZ 2048 /* Size of stack allocated buffers when */ - /* we want large buffers. */ +# define BUFSZ 2048 /* Size of stack allocated buffers when */ + /* we want large buffers. */ typedef void (* oom_fn)(void); # define OUT_OF_MEMORY { if (CORD_oom_fn != (oom_fn) 0) (*CORD_oom_fn)(); \ - ABORT("Out of memory\n"); } + ABORT("Out of memory\n"); } # define ABORT(msg) { fprintf(stderr, "%s\n", msg); abort(); } CORD CORD_cat_char(CORD x, char c) { register char * string; - + if (c == '\0') return(CORD_cat(x, CORD_nul(1))); string = GC_MALLOC_ATOMIC(2); if (string == 0) OUT_OF_MEMORY; @@ -83,22 +83,22 @@ CORD CORD_catn(int nargs, ...) } typedef struct { - size_t len; - size_t count; - char * buf; + size_t len; + size_t count; + char * buf; } CORD_fill_data; int CORD_fill_proc(char c, void * client_data) { register CORD_fill_data * d = (CORD_fill_data *)client_data; register size_t count = d -> count; - + (d -> buf)[count] = c; d -> count = ++count; if (count >= d -> len) { - return(1); + return(1); } else { - return(0); + return(0); } } @@ -109,7 +109,7 @@ int CORD_batched_fill_proc(const char * s, void * client_data) register size_t max = d -> len; register char * buf = d -> buf; register const char * t = s; - + while((buf[count] = *t++) != '\0') { count++; if (count >= max) { @@ -121,12 +121,12 @@ int CORD_batched_fill_proc(const char * s, void * client_data) return(0); } -/* Fill buf with len characters starting at i. */ -/* Assumes len characters are available. */ +/* Fill buf with len characters starting at i. */ +/* Assumes len characters are available. */ void CORD_fill_buf(CORD x, size_t i, size_t len, char * buf) { CORD_fill_data fd; - + fd.len = len; fd.buf = buf; fd.count = 0; @@ -138,7 +138,7 @@ int CORD_cmp(CORD x, CORD y) CORD_pos xpos; CORD_pos ypos; register size_t avail, yavail; - + if (y == CORD_EMPTY) return(x != CORD_EMPTY); if (x == CORD_EMPTY) return(-1); if (CORD_IS_STRING(y) && CORD_IS_STRING(x)) return(strcmp(x,y)); @@ -147,7 +147,7 @@ int CORD_cmp(CORD x, CORD y) for(;;) { if (!CORD_pos_valid(xpos)) { if (CORD_pos_valid(ypos)) { - return(-1); + return(-1); } else { return(0); } @@ -163,12 +163,12 @@ int CORD_cmp(CORD x, CORD y) CORD_next(xpos); CORD_next(ypos); } else { - /* process as many characters as we can */ + /* process as many characters as we can */ register int result; - + if (avail > yavail) avail = yavail; result = strncmp(CORD_pos_cur_char_addr(xpos), - CORD_pos_cur_char_addr(ypos), avail); + CORD_pos_cur_char_addr(ypos), avail); if (result != 0) return(result); CORD_pos_advance(xpos, avail); CORD_pos_advance(ypos, avail); @@ -182,13 +182,13 @@ int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len) CORD_pos ypos; register size_t count; register long avail, yavail; - + CORD_set_pos(xpos, x, x_start); CORD_set_pos(ypos, y, y_start); for(count = 0; count < len;) { if (!CORD_pos_valid(xpos)) { if (CORD_pos_valid(ypos)) { - return(-1); + return(-1); } else { return(0); } @@ -205,14 +205,14 @@ int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start, size_t len) CORD_next(ypos); count++; } else { - /* process as many characters as we can */ + /* process as many characters as we can */ register int result; - + if (avail > yavail) avail = yavail; count += avail; if (count > len) avail -= (count - len); result = strncmp(CORD_pos_cur_char_addr(xpos), - CORD_pos_cur_char_addr(ypos), (size_t)avail); + CORD_pos_cur_char_addr(ypos), (size_t)avail); if (result != 0) return(result); CORD_pos_advance(xpos, (size_t)avail); CORD_pos_advance(ypos, (size_t)avail); @@ -225,7 +225,7 @@ char * CORD_to_char_star(CORD x) { register size_t len = CORD_len(x); char * result = GC_MALLOC_ATOMIC(len + 1); - + if (result == 0) OUT_OF_MEMORY; CORD_fill_buf(x, 0, len, result); result[len] = '\0'; @@ -254,7 +254,7 @@ const char * CORD_to_const_char_star(CORD x) char CORD_fetch(CORD x, size_t i) { CORD_pos xpos; - + CORD_set_pos(xpos, x, i); if (!CORD_pos_valid(xpos)) ABORT("bad index?"); return(CORD_pos_fetch(xpos)); @@ -264,36 +264,36 @@ char CORD_fetch(CORD x, size_t i) int CORD_put_proc(char c, void * client_data) { register FILE * f = (FILE *)client_data; - + return(putc(c, f) == EOF); } int CORD_batched_put_proc(const char * s, void * client_data) { register FILE * f = (FILE *)client_data; - + return(fputs(s, f) == EOF); } - + int CORD_put(CORD x, FILE * f) { if (CORD_iter5(x, 0, CORD_put_proc, CORD_batched_put_proc, f)) { return(EOF); } else { - return(1); + return(1); } } typedef struct { - size_t pos; /* Current position in the cord */ - char target; /* Character we're looking for */ + size_t pos; /* Current position in the cord */ + char target; /* Character we're looking for */ } chr_data; int CORD_chr_proc(char c, void * client_data) { register chr_data * d = (chr_data *)client_data; - + if (c == d -> target) return(1); (d -> pos) ++; return(0); @@ -302,7 +302,7 @@ int CORD_chr_proc(char c, void * client_data) int CORD_rchr_proc(char c, void * client_data) { register chr_data * d = (chr_data *)client_data; - + if (c == d -> target) return(1); (d -> pos) --; return(0); @@ -312,48 +312,48 @@ int CORD_batched_chr_proc(const char *s, void * client_data) { register chr_data * d = (chr_data *)client_data; register char * occ = strchr(s, d -> target); - + if (occ == 0) { - d -> pos += strlen(s); - return(0); + d -> pos += strlen(s); + return(0); } else { - d -> pos += occ - s; - return(1); + d -> pos += occ - s; + return(1); } } size_t CORD_chr(CORD x, size_t i, int c) { chr_data d; - + d.pos = i; d.target = c; if (CORD_iter5(x, i, CORD_chr_proc, CORD_batched_chr_proc, &d)) { return(d.pos); } else { - return(CORD_NOT_FOUND); + return(CORD_NOT_FOUND); } } size_t CORD_rchr(CORD x, size_t i, int c) { chr_data d; - + d.pos = i; d.target = c; if (CORD_riter4(x, i, CORD_rchr_proc, &d)) { return(d.pos); } else { - return(CORD_NOT_FOUND); + return(CORD_NOT_FOUND); } } -/* Find the first occurrence of s in x at position start or later. */ -/* This uses an asymptotically poor algorithm, which should typically */ -/* perform acceptably. We compare the first few characters directly, */ -/* and call CORD_ncmp whenever there is a partial match. */ -/* This has the advantage that we allocate very little, or not at all. */ -/* It's very fast if there are few close misses. */ +/* Find the first occurrence of s in x at position start or later. */ +/* This uses an asymptotically poor algorithm, which should typically */ +/* perform acceptably. We compare the first few characters directly, */ +/* and call CORD_ncmp whenever there is a partial match. */ +/* This has the advantage that we allocate very little, or not at all. */ +/* It's very fast if there are few close misses. */ size_t CORD_str(CORD x, size_t start, CORD s) { CORD_pos xpos; @@ -361,14 +361,14 @@ size_t CORD_str(CORD x, size_t start, CORD s) size_t slen; register size_t start_len; const char * s_start; - unsigned long s_buf = 0; /* The first few characters of s */ - unsigned long x_buf = 0; /* Start of candidate substring. */ - /* Initialized only to make compilers */ - /* happy. */ + unsigned long s_buf = 0; /* The first few characters of s */ + unsigned long x_buf = 0; /* Start of candidate substring. */ + /* Initialized only to make compilers */ + /* happy. */ unsigned long mask = 0; register size_t i; register size_t match_pos; - + if (s == CORD_EMPTY) return(start); if (CORD_IS_STRING(s)) { s_start = s; @@ -391,17 +391,17 @@ size_t CORD_str(CORD x, size_t start, CORD s) CORD_next(xpos); } for (match_pos = start; ; match_pos++) { - if ((x_buf & mask) == s_buf) { - if (slen == start_len || - CORD_ncmp(x, match_pos + start_len, - s, start_len, slen - start_len) == 0) { - return(match_pos); - } - } - if ( match_pos == xlen - slen ) { - return(CORD_NOT_FOUND); - } - x_buf <<= 8; + if ((x_buf & mask) == s_buf) { + if (slen == start_len || + CORD_ncmp(x, match_pos + start_len, + s, start_len, slen - start_len) == 0) { + return(match_pos); + } + } + if ( match_pos == xlen - slen ) { + return(CORD_NOT_FOUND); + } + x_buf <<= 8; x_buf |= (unsigned char)CORD_pos_fetch(xpos); CORD_next(xpos); } @@ -442,16 +442,16 @@ CORD CORD_from_file_eager(FILE * f) { register int c; CORD_ec ecord; - + CORD_ec_init(ecord); for(;;) { c = getc(f); if (c == 0) { - /* Append the right number of NULs */ - /* Note that any string of NULs is rpresented in 4 words, */ - /* independent of its length. */ + /* Append the right number of NULs */ + /* Note that any string of NULs is rpresented in 4 words, */ + /* independent of its length. */ register size_t count = 1; - + CORD_ec_flush_buf(ecord); while ((c = getc(f)) == 0) count++; ecord[0].ec_cord = CORD_cat(ecord[0].ec_cord, CORD_nul(count)); @@ -463,18 +463,18 @@ CORD CORD_from_file_eager(FILE * f) return(CORD_balance(CORD_ec_to_cord(ecord))); } -/* The state maintained for a lazily read file consists primarily */ -/* of a large direct-mapped cache of previously read values. */ -/* We could rely more on stdio buffering. That would have 2 */ -/* disadvantages: */ -/* 1) Empirically, not all fseek implementations preserve the */ -/* buffer whenever they could. */ -/* 2) It would fail if 2 different sections of a long cord */ -/* were being read alternately. */ -/* We do use the stdio buffer for read ahead. */ -/* To guarantee thread safety in the presence of atomic pointer */ -/* writes, cache lines are always replaced, and never modified in */ -/* place. */ +/* The state maintained for a lazily read file consists primarily */ +/* of a large direct-mapped cache of previously read values. */ +/* We could rely more on stdio buffering. That would have 2 */ +/* disadvantages: */ +/* 1) Empirically, not all fseek implementations preserve the */ +/* buffer whenever they could. */ +/* 2) It would fail if 2 different sections of a long cord */ +/* were being read alternately. */ +/* We do use the stdio buffer for read ahead. */ +/* To guarantee thread safety in the presence of atomic pointer */ +/* writes, cache lines are always replaced, and never modified in */ +/* place. */ # define LOG_CACHE_SZ 14 # define CACHE_SZ (1 << LOG_CACHE_SZ) @@ -484,12 +484,12 @@ CORD CORD_from_file_eager(FILE * f) typedef struct { size_t tag; char data[LINE_SZ]; - /* data[i%LINE_SZ] = ith char in file if tag = i/LINE_SZ */ + /* data[i%LINE_SZ] = ith char in file if tag = i/LINE_SZ */ } cache_line; typedef struct { FILE * lf_file; - size_t lf_current; /* Current file pointer value */ + size_t lf_current; /* Current file pointer value */ cache_line * volatile lf_cache[CACHE_SZ/LINE_SZ]; } lf_state; @@ -501,7 +501,7 @@ typedef struct { typedef struct { lf_state * state; - size_t file_pos; /* Position of needed character. */ + size_t file_pos; /* Position of needed character. */ cache_line * new_cache; } refill_data; @@ -515,14 +515,14 @@ refill_data * client_data; size_t line_start = LINE_START(file_pos); size_t line_no = DIV_LINE_SZ(MOD_CACHE_SZ(file_pos)); cache_line * new_cache = client_data -> new_cache; - + if (line_start != state -> lf_current && fseek(f, line_start, SEEK_SET) != 0) { - ABORT("fseek failed"); + ABORT("fseek failed"); } if (fread(new_cache -> data, sizeof(char), LINE_SZ, f) - <= file_pos - line_start) { - ABORT("fread failed"); + <= file_pos - line_start) { + ABORT("fread failed"); } new_cache -> tag = DIV_LINE_SZ(file_pos); /* Store barrier goes here. */ @@ -535,47 +535,50 @@ char CORD_lf_func(size_t i, void * client_data) { register lf_state * state = (lf_state *)client_data; register cache_line * volatile * cl_addr = - &(state -> lf_cache[DIV_LINE_SZ(MOD_CACHE_SZ(i))]); + &(state -> lf_cache[DIV_LINE_SZ(MOD_CACHE_SZ(i))]); register cache_line * cl = (cache_line *)ATOMIC_READ(cl_addr); - + if (cl == 0 || cl -> tag != DIV_LINE_SZ(i)) { - /* Cache miss */ - refill_data rd; - + /* Cache miss */ + refill_data rd; + rd.state = state; rd.file_pos = i; rd.new_cache = GC_NEW_ATOMIC(cache_line); if (rd.new_cache == 0) OUT_OF_MEMORY; return((char)(GC_word) - GC_call_with_alloc_lock((GC_fn_type) refill_cache, &rd)); + GC_call_with_alloc_lock((GC_fn_type) refill_cache, &rd)); } return(cl -> data[MOD_LINE_SZ(i)]); -} +} /*ARGSUSED*/ -void CORD_lf_close_proc(void * obj, void * client_data) +void CORD_lf_close_proc(void * obj, void * client_data) { if (fclose(((lf_state *)obj) -> lf_file) != 0) { - ABORT("CORD_lf_close_proc: fclose failed"); + ABORT("CORD_lf_close_proc: fclose failed"); } -} +} CORD CORD_from_file_lazy_inner(FILE * f, size_t len) { register lf_state * state = GC_NEW(lf_state); register int i; - + if (state == 0) OUT_OF_MEMORY; if (len != 0) { - /* Dummy read to force buffer allocation. */ - /* This greatly increases the probability */ - /* of avoiding deadlock if buffer allocation */ - /* is redirected to GC_malloc and the */ - /* world is multithreaded. */ - char buf[1]; - - (void) fread(buf, 1, 1, f); - rewind(f); + /* Dummy read to force buffer allocation. */ + /* This greatly increases the probability */ + /* of avoiding deadlock if buffer allocation */ + /* is redirected to GC_malloc and the */ + /* world is multithreaded. */ + char buf[1]; + + if (fread(buf, 1, 1, f) > 1) { + /* Just to suppress "unused result" compiler warning. */ + ABORT("fread unexpected result"); + } + rewind(f); } state -> lf_file = f; for (i = 0; i < CACHE_SZ/LINE_SZ; i++) { @@ -589,7 +592,7 @@ CORD CORD_from_file_lazy_inner(FILE * f, size_t len) CORD CORD_from_file_lazy(FILE * f) { register long len; - + if (fseek(f, 0l, SEEK_END) != 0) { ABORT("Bad fd argument - fseek failed"); } @@ -605,7 +608,7 @@ CORD CORD_from_file_lazy(FILE * f) CORD CORD_from_file(FILE * f) { register long len; - + if (fseek(f, 0l, SEEK_END) != 0) { ABORT("Bad fd argument - fseek failed"); } |