summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2023-03-18 22:22:31 +0200
committerArnold D. Robbins <arnold@skeeve.com>2023-03-18 22:22:31 +0200
commit29e74d75768f25cd6b0592d21c029436a57b18e7 (patch)
tree95b4d1f0e49a044b475d6522998dd43295c7ee45
parent11dd1f9cc4f36ab2ae0f56ee54832781b61bdaa9 (diff)
downloadgawk-29e74d75768f25cd6b0592d21c029436a57b18e7.tar.gz
Allow input records longer than INT_MAX.
-rw-r--r--ChangeLog15
-rw-r--r--awk.h2
-rw-r--r--field.c6
-rw-r--r--io.c51
4 files changed, 46 insertions, 28 deletions
diff --git a/ChangeLog b/ChangeLog
index c1d22b94..732d8af5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2023-03-18 Miguel Pineiro Jr. <mpj@pineiro.cc>
+
+ Allow records longer than INT_MAX. For test cases,
+ see https://lists.gnu.org/archive/html/bug-gawk/2021-05/msg00003.html.
+
+ * awk.h (set_record): `cnt' is now size_t.
+ * field.c (set_record): `cnt' is now size_t. Adjust databuf_size
+ and MAX_SIZE macro.
+ * io.c (get_a_record): Add new second parameter, size_t *len,
+ which holds the size. Adjust return value meaning. Deal with
+ return from an extension's get_a_record() function.
+ (do_getline_redir): Adjust calling get_a_record().
+ (do_getline): Ditto.
+ (inrec): Ditto.
+
2023-03-09 Arnold D. Robbins <arnold@skeeve.com>
* gawkapi.h: Update copyright year. Small edit in leading comment.
diff --git a/awk.h b/awk.h
index 661eb637..afe90f0a 100644
--- a/awk.h
+++ b/awk.h
@@ -1569,7 +1569,7 @@ extern NODE *get_actual_argument(NODE *, int, bool);
#endif
/* field.c */
extern void init_fields(void);
-extern void set_record(const char *buf, int cnt, const awk_fieldwidth_info_t *);
+extern void set_record(const char *buf, size_t cnt, const awk_fieldwidth_info_t *);
extern void reset_record(void);
extern void rebuild_record(void);
extern void set_NF(void);
diff --git a/field.c b/field.c
index 343a3100..9ff37582 100644
--- a/field.c
+++ b/field.c
@@ -261,13 +261,13 @@ rebuild_record()
* but better correct than fast.
*/
void
-set_record(const char *buf, int cnt, const awk_fieldwidth_info_t *fw)
+set_record(const char *buf, size_t cnt, const awk_fieldwidth_info_t *fw)
{
NODE *n;
static char *databuf;
- static unsigned long databuf_size;
+ static size_t databuf_size;
#define INITIAL_SIZE 512
-#define MAX_SIZE ((unsigned long) ~0) /* maximally portable ... */
+#define MAX_SIZE ((size_t) ~0) /* maximally portable ... */
purge_record();
diff --git a/io.c b/io.c
index dcbec287..c1057f96 100644
--- a/io.c
+++ b/io.c
@@ -267,7 +267,7 @@ static RECVALUE rsrescan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state);
static RECVALUE (*matchrec)(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) = rs1scan;
-static int get_a_record(char **out, IOBUF *iop, int *errcode, const awk_fieldwidth_info_t **field_width);
+static int get_a_record(char **out, size_t *len, IOBUF *iop, int *errcode, const awk_fieldwidth_info_t **field_width);
static void free_rp(struct redirect *rp);
@@ -565,21 +565,19 @@ bool
inrec(IOBUF *iop, int *errcode)
{
char *begin;
- int cnt;
- bool retval = true;
+ size_t cnt;
+ bool retval;
const awk_fieldwidth_info_t *field_width = NULL;
if (at_eof(iop) && no_data_left(iop))
- cnt = EOF;
+ retval = false;
else if ((iop->flag & IOP_CLOSED) != 0)
- cnt = EOF;
+ retval = false;
else
- cnt = get_a_record(& begin, iop, errcode, & field_width);
+ /* Note that get_a_record may return -2 when I/O would block */
+ retval = (get_a_record(& begin, & cnt, iop, errcode, & field_width) == 0);
- /* Note that get_a_record may return -2 when I/O would block */
- if (cnt < 0) {
- retval = false;
- } else {
+ if (retval) {
INCREMENT_REC(NR);
INCREMENT_REC(FNR);
set_record(begin, cnt, field_width);
@@ -2808,7 +2806,8 @@ do_getline_redir(int into_variable, enum redirval redirtype)
{
struct redirect *rp = NULL;
IOBUF *iop;
- int cnt = EOF;
+ size_t cnt;
+ int retval = EOF;
char *s = NULL;
int errcode;
NODE *redir_exp = NULL;
@@ -2843,14 +2842,14 @@ do_getline_redir(int into_variable, enum redirval redirtype)
return make_number((AWKNUM) 0.0);
errcode = 0;
- cnt = get_a_record(& s, iop, & errcode, (lhs ? NULL : & field_width));
+ retval = get_a_record(& s, & cnt, iop, & errcode, (lhs ? NULL : & field_width));
if (errcode != 0) {
if (! do_traditional && (errcode != -1))
update_ERRNO_int(errcode);
- return make_number((AWKNUM) cnt);
+ return make_number((AWKNUM) retval);
}
- if (cnt == EOF) {
+ if (retval == EOF) {
/*
* Don't do iop_close() here if we are
* reading from a pipe; otherwise
@@ -2882,7 +2881,8 @@ do_getline_redir(int into_variable, enum redirval redirtype)
NODE *
do_getline(int into_variable, IOBUF *iop)
{
- int cnt = EOF;
+ size_t cnt;
+ int retval = EOF;
char *s = NULL;
int errcode;
const awk_fieldwidth_info_t *field_width = NULL;
@@ -2894,16 +2894,16 @@ do_getline(int into_variable, IOBUF *iop)
}
errcode = 0;
- cnt = get_a_record(& s, iop, & errcode, (into_variable ? NULL : & field_width));
+ retval = get_a_record(& s, & cnt, iop, & errcode, (into_variable ? NULL : & field_width));
if (errcode != 0) {
if (! do_traditional && (errcode != -1))
update_ERRNO_int(errcode);
if (into_variable)
(void) POP_ADDRESS();
- return make_number((AWKNUM) cnt);
+ return make_number((AWKNUM) retval);
}
- if (cnt == EOF)
+ if (retval == EOF)
return NULL; /* try next file */
INCREMENT_REC(NR);
INCREMENT_REC(FNR);
@@ -3851,13 +3851,14 @@ errno_io_retry(void)
/*
* get_a_record --- read a record from IOP into out,
- * return length or EOF, set RT.
+ * its length into len, and set RT.
+ * return 0 on success, EOF when out of data, and -2 if I/O would block.
* Note that errcode is never NULL, and the caller initializes *errcode to 0.
- * If I/O would block, return -2.
*/
static int
get_a_record(char **out, /* pointer to pointer to data */
+ size_t *len, /* pointer to record length */
IOBUF *iop, /* input IOP */
int *errcode, /* pointer to error variable */
const awk_fieldwidth_info_t **field_width)
@@ -3866,7 +3867,6 @@ get_a_record(char **out, /* pointer to pointer to data */
struct recmatch recm;
SCANSTATE state;
RECVALUE ret;
- int retval;
NODE *rtval = NULL;
static RECVALUE (*lastmatchrec)(IOBUF *iop, struct recmatch *recm, SCANSTATE *state) = NULL;
@@ -3885,6 +3885,9 @@ get_a_record(char **out, /* pointer to pointer to data */
if (rc == EOF)
iop->flag |= IOP_AT_EOF;
else {
+ assert(rc >= 0);
+ *len = rc;
+ rc = 0;
if (rt_len != 0)
set_RT(rt_start, rt_len);
else
@@ -4044,11 +4047,11 @@ get_a_record(char **out, /* pointer to pointer to data */
if (recm.len == 0) {
*out = NULL;
- retval = 0;
+ *len = 0;
} else {
assert(recm.start != NULL);
*out = recm.start;
- retval = recm.len;
+ *len = recm.len;
}
iop->off += recm.len + recm.rt_len;
@@ -4056,7 +4059,7 @@ get_a_record(char **out, /* pointer to pointer to data */
if (recm.len == 0 && recm.rt_len == 0 && at_eof(iop))
return EOF;
else
- return retval;
+ return 0;
}
/* set_RS --- update things as appropriate when RS is set */