diff options
Diffstat (limited to 'io.c')
-rw-r--r-- | io.c | 108 |
1 files changed, 103 insertions, 5 deletions
@@ -260,10 +260,12 @@ static void find_input_parser(IOBUF *iop); static bool find_output_wrapper(awk_output_buf_t *outbuf); static void init_output_wrapper(awk_output_buf_t *outbuf); static bool find_two_way_processor(const char *name, struct redirect *rp); +static bool avoid_flush(const char *name); static RECVALUE rs1scan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state); static RECVALUE rsnullscan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state); static RECVALUE rsrescan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state); +static RECVALUE csvscan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state); static RECVALUE (*matchrec)(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) = rs1scan; @@ -340,6 +342,15 @@ init_io() read_can_timeout = true; } +/* init_csv_records --- set up for CSV handling */ + +void +init_csv_records(void) +{ + if (do_csv) + matchrec = csvscan; +} + #if defined(__MINGW32__) || defined(__CYGWIN__) /* binmode --- convert BINMODE to string for fopen */ @@ -951,7 +962,11 @@ redirect_string(const char *str, size_t explen, bool not_string, /* set close-on-exec */ os_close_on_exec(fileno(rp->output.fp), str, "pipe", "to"); - rp->flag |= RED_FLUSH; + + // Allow the user to say they don't want pipe output + // to be flushed all the time. + if (! avoid_flush(str)) + rp->flag |= RED_FLUSH; break; case redirect_pipein: if (extfd >= 0) { @@ -3815,6 +3830,56 @@ find_longest_terminator: return REC_OK; } +/* csvscan --- handle --csv mode */ + +static RECVALUE +csvscan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) +{ + char *bp; + char rs = '\n'; + static bool in_quote = false; + + memset(recm, '\0', sizeof(struct recmatch)); + *(iop->dataend) = rs; /* set sentinel */ + recm->start = iop->off; /* beginning of record */ + + if (*state == NOSTATE) /* reset in_quote at the beginning of the record */ + in_quote = false; + + bp = iop->off; + if (*state == INDATA) /* skip over data we've already seen */ + bp += iop->scanoff; + + /* look for a newline outside quotes */ + do { + while (*bp != rs) { + if (*bp == '\"') + in_quote = ! in_quote; + bp++; + } + if (bp > iop->off && bp[-1] == '\r') { + // convert CR-LF to LF by shifting the record + memmove(bp - 1, bp, iop->dataend - bp); + iop->dataend--; + bp--; + } + } while (in_quote && bp < iop->dataend && bp++); + + /* set len to what we have so far, in case this is all there is */ + recm->len = bp - recm->start; + + if (bp < iop->dataend) { /* found it in the buffer */ + recm->rt_start = bp; + recm->rt_len = 1; + *state = NOSTATE; + return REC_OK; + } else { + *state = INDATA; + iop->scanoff = bp - iop->off; + return NOTERM; + } +} + /* retryable --- return true if PROCINFO[<filename>, "RETRY"] exists */ static inline int @@ -4064,6 +4129,13 @@ get_a_record(char **out, /* pointer to pointer to data */ void set_RS() { + /* + * Setting RS does nothing if CSV mode, warn in that case, + * but don't warn on first call which happens at initialization. + */ + static bool first_time = true; + static bool warned = false; + static NODE *save_rs = NULL; /* @@ -4094,9 +4166,18 @@ set_RS() refree(RS_re[1]); RS_re[0] = RS_re[1] = RS_regexp = NULL; + if (! first_time && do_csv) { + if (! warned) { + warned = true; + warning(_("assignment to RS has no effect when using --csv")); + } + return; + } + if (RS->stlen == 0) { RS_is_null = true; - matchrec = rsnullscan; + if (first_time || ! do_csv) + matchrec = rsnullscan; } else if ((RS->stlen > 1 || (RS->flags & REGEX) != 0) && ! do_traditional) { static bool warned = false; @@ -4104,17 +4185,23 @@ set_RS() RS_re[1] = make_regexp(RS->stptr, RS->stlen, true, true, true); RS_regexp = RS_re[IGNORECASE]; - matchrec = rsrescan; + if (first_time || ! do_csv) + matchrec = rsrescan; if (do_lint_extensions && ! warned) { lintwarn(_("multicharacter value of `RS' is a gawk extension")); warned = true; } - } else - matchrec = rs1scan; + } else { + if (first_time || ! do_csv) + matchrec = rs1scan; + } set_FS: if (current_field_sep() == Using_FS) set_FS(); + + if (first_time) + first_time = false; } @@ -4486,3 +4573,14 @@ init_output_wrapper(awk_output_buf_t *outbuf) outbuf->gawk_ferror = gawk_ferror; outbuf->gawk_fclose = gawk_fclose; } + +/* avoid_flush --- return true if should not flush a pipe every time */ + +static bool +avoid_flush(const char *name) +{ + static const char bufferpipe[] = "BUFFERPIPE"; + + return in_PROCINFO(bufferpipe, NULL, NULL) != NULL + || in_PROCINFO(name, bufferpipe, NULL) != NULL; +} |