summaryrefslogtreecommitdiff
path: root/io.c
diff options
context:
space:
mode:
Diffstat (limited to 'io.c')
-rw-r--r--io.c108
1 files changed, 103 insertions, 5 deletions
diff --git a/io.c b/io.c
index dcbec287..dccf3952 100644
--- a/io.c
+++ b/io.c
@@ -260,10 +260,12 @@ static void find_input_parser(IOBUF *iop);
static bool find_output_wrapper(awk_output_buf_t *outbuf);
static void init_output_wrapper(awk_output_buf_t *outbuf);
static bool find_two_way_processor(const char *name, struct redirect *rp);
+static bool avoid_flush(const char *name);
static RECVALUE rs1scan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state);
static RECVALUE rsnullscan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state);
static RECVALUE rsrescan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state);
+static RECVALUE csvscan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state);
static RECVALUE (*matchrec)(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) = rs1scan;
@@ -340,6 +342,15 @@ init_io()
read_can_timeout = true;
}
+/* init_csv_records --- set up for CSV handling */
+
+void
+init_csv_records(void)
+{
+ if (do_csv)
+ matchrec = csvscan;
+}
+
#if defined(__MINGW32__) || defined(__CYGWIN__)
/* binmode --- convert BINMODE to string for fopen */
@@ -951,7 +962,11 @@ redirect_string(const char *str, size_t explen, bool not_string,
/* set close-on-exec */
os_close_on_exec(fileno(rp->output.fp), str, "pipe", "to");
- rp->flag |= RED_FLUSH;
+
+ // Allow the user to say they don't want pipe output
+ // to be flushed all the time.
+ if (! avoid_flush(str))
+ rp->flag |= RED_FLUSH;
break;
case redirect_pipein:
if (extfd >= 0) {
@@ -3815,6 +3830,56 @@ find_longest_terminator:
return REC_OK;
}
+/* csvscan --- handle --csv mode */
+
+static RECVALUE
+csvscan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state)
+{
+ char *bp;
+ char rs = '\n';
+ static bool in_quote = false;
+
+ memset(recm, '\0', sizeof(struct recmatch));
+ *(iop->dataend) = rs; /* set sentinel */
+ recm->start = iop->off; /* beginning of record */
+
+ if (*state == NOSTATE) /* reset in_quote at the beginning of the record */
+ in_quote = false;
+
+ bp = iop->off;
+ if (*state == INDATA) /* skip over data we've already seen */
+ bp += iop->scanoff;
+
+ /* look for a newline outside quotes */
+ do {
+ while (*bp != rs) {
+ if (*bp == '\"')
+ in_quote = ! in_quote;
+ bp++;
+ }
+ if (bp > iop->off && bp[-1] == '\r') {
+ // convert CR-LF to LF by shifting the record
+ memmove(bp - 1, bp, iop->dataend - bp);
+ iop->dataend--;
+ bp--;
+ }
+ } while (in_quote && bp < iop->dataend && bp++);
+
+ /* set len to what we have so far, in case this is all there is */
+ recm->len = bp - recm->start;
+
+ if (bp < iop->dataend) { /* found it in the buffer */
+ recm->rt_start = bp;
+ recm->rt_len = 1;
+ *state = NOSTATE;
+ return REC_OK;
+ } else {
+ *state = INDATA;
+ iop->scanoff = bp - iop->off;
+ return NOTERM;
+ }
+}
+
/* retryable --- return true if PROCINFO[<filename>, "RETRY"] exists */
static inline int
@@ -4064,6 +4129,13 @@ get_a_record(char **out, /* pointer to pointer to data */
void
set_RS()
{
+ /*
+ * Setting RS does nothing if CSV mode, warn in that case,
+ * but don't warn on first call which happens at initialization.
+ */
+ static bool first_time = true;
+ static bool warned = false;
+
static NODE *save_rs = NULL;
/*
@@ -4094,9 +4166,18 @@ set_RS()
refree(RS_re[1]);
RS_re[0] = RS_re[1] = RS_regexp = NULL;
+ if (! first_time && do_csv) {
+ if (! warned) {
+ warned = true;
+ warning(_("assignment to RS has no effect when using --csv"));
+ }
+ return;
+ }
+
if (RS->stlen == 0) {
RS_is_null = true;
- matchrec = rsnullscan;
+ if (first_time || ! do_csv)
+ matchrec = rsnullscan;
} else if ((RS->stlen > 1 || (RS->flags & REGEX) != 0) && ! do_traditional) {
static bool warned = false;
@@ -4104,17 +4185,23 @@ set_RS()
RS_re[1] = make_regexp(RS->stptr, RS->stlen, true, true, true);
RS_regexp = RS_re[IGNORECASE];
- matchrec = rsrescan;
+ if (first_time || ! do_csv)
+ matchrec = rsrescan;
if (do_lint_extensions && ! warned) {
lintwarn(_("multicharacter value of `RS' is a gawk extension"));
warned = true;
}
- } else
- matchrec = rs1scan;
+ } else {
+ if (first_time || ! do_csv)
+ matchrec = rs1scan;
+ }
set_FS:
if (current_field_sep() == Using_FS)
set_FS();
+
+ if (first_time)
+ first_time = false;
}
@@ -4486,3 +4573,14 @@ init_output_wrapper(awk_output_buf_t *outbuf)
outbuf->gawk_ferror = gawk_ferror;
outbuf->gawk_fclose = gawk_fclose;
}
+
+/* avoid_flush --- return true if should not flush a pipe every time */
+
+static bool
+avoid_flush(const char *name)
+{
+ static const char bufferpipe[] = "BUFFERPIPE";
+
+ return in_PROCINFO(bufferpipe, NULL, NULL) != NULL
+ || in_PROCINFO(name, bufferpipe, NULL) != NULL;
+}