summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2023-03-17 15:22:54 +0200
committerArnold D. Robbins <arnold@skeeve.com>2023-03-17 15:22:54 +0200
commit428e9bffb9a3410452d76b142b24d6e8d48397ab (patch)
treee092d7025d4eeffc01a67d68ea39e53a244768dd
parent2e18b77f5b6926e6616ce22d2d3e6d511de69c9b (diff)
downloadgawk-428e9bffb9a3410452d76b142b24d6e8d48397ab.tar.gz
Update split() for CSV data.
-rw-r--r--ChangeLog4
-rw-r--r--field.c14
-rw-r--r--pc/ChangeLog4
-rw-r--r--pc/Makefile.tst9
-rw-r--r--test/ChangeLog5
-rw-r--r--test/Makefile.am7
-rw-r--r--test/Makefile.in12
-rw-r--r--test/Maketests5
-rw-r--r--test/csv2.awk34
-rw-r--r--test/csv2.ok0
10 files changed, 76 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog
index 55d16d3c..99d9fbf9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2023-03-17 Arnold D. Robbins <arnold@skeeve.com>
+
+ * field.c (do_split): Modify behavior for --csv.
+
2023-03-16 Arnold D. Robbins <arnold@skeeve.com>
* awk.h (enum do_flag_values): Add DO_CSV.
diff --git a/field.c b/field.c
index 44c153dc..0f0f5192 100644
--- a/field.c
+++ b/field.c
@@ -1173,7 +1173,10 @@ do_split(int nargs)
if ((sep->flags & REGEX) != 0)
sep = sep->typed_re;
- if ( (sep->re_flags & FS_DFLT) != 0
+ if (do_csv && (sep->re_flags & FS_DFLT) != 0 && nargs == 3) {
+ fs = NULL;
+ parseit = comma_parse_field;
+ } else if ( (sep->re_flags & FS_DFLT) != 0
&& current_field_sep() == Using_FS
&& ! RS_is_null) {
parseit = parse_field;
@@ -1194,15 +1197,6 @@ do_split(int nargs)
} else if (fs->stlen == 1 && (sep->re_flags & CONSTANT) == 0) {
if (fs->stptr[0] == ' ') {
parseit = def_parse_field;
- } else if (fs->stptr[0] == ',' && ! do_posix) {
- static bool warned = false;
-
- parseit = comma_parse_field;
-
- if (do_lint && ! warned) {
- warned = true;
- lintwarn(_("split: CSV parsing is a non-standard extension"));
- }
} else
parseit = sc_parse_field;
} else {
diff --git a/pc/ChangeLog b/pc/ChangeLog
index 541be9c5..12c14c72 100644
--- a/pc/ChangeLog
+++ b/pc/ChangeLog
@@ -1,3 +1,7 @@
+2023-03-17 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.tst: Regenerated.
+
2023-03-16 Arnold D. Robbins <arnold@skeeve.com>
* Makefile.tst: Regenerated.
diff --git a/pc/Makefile.tst b/pc/Makefile.tst
index 72f8a9cb..7228004b 100644
--- a/pc/Makefile.tst
+++ b/pc/Makefile.tst
@@ -190,7 +190,7 @@ GAWK_EXT_TESTS = \
aadelete1 aadelete2 aarray1 aasort aasorti argtest arraysort \
arraysort2 arraytype asortbool backw badargs beginfile1 beginfile2 \
binmode1 charasbytes clos1way clos1way2 clos1way3 clos1way4 \
- clos1way5 clos1way6 colonwarn commas crlf csv1 dbugeval dbugeval2 \
+ clos1way5 clos1way6 colonwarn commas crlf csv1 csv2 dbugeval dbugeval2 \
dbugeval3 dbugeval4 dbugtypedre1 dbugtypedre2 delsub \
devfd devfd1 devfd2 dfacheck1 dumpvars \
elemnew1 elemnew2 elemnew3 errno exit fieldwdth forcenum fpat1 fpat2 \
@@ -292,7 +292,7 @@ NEED_TRADITIONAL = litoct tradanch rscompat
NEED_PMA = pma
# List of tests that need --csv
-NEED_CSV = csv1
+NEED_CSV = csv1 csv2
# Lists of tests that run a shell script
RUN_SHELL = exit fflush localenl modifiers next randtest rtlen rtlen01
@@ -2727,6 +2727,11 @@ csv1:
@-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --csv < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+csv2:
+ @echo $@
+ @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --csv >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
dbugeval2:
@echo $@
@-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --debug < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/ChangeLog b/test/ChangeLog
index f247c540..989d105b 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,8 @@
+2023-03-17 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (EXTRA_DIST, NEED_CSV, GAWK_EXT_TESTS): New test, csv2.
+ * csv2.awk, csv2.ok: New files.
+
2023-03-16 Arnold D. Robbins <arnold@skeeve.com>
* Gentests: Handle NEED_CSV.
diff --git a/test/Makefile.am b/test/Makefile.am
index 7bb4c983..e0be391c 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -225,7 +225,8 @@ EXTRA_DIST = \
crlf.ok \
csv1.awk \
csv1.in \
- csv1.ok \
+ csv2.awk \
+ csv2.ok \
datanonl.awk \
datanonl.in \
datanonl.ok \
@@ -1506,7 +1507,7 @@ GAWK_EXT_TESTS = \
aadelete1 aadelete2 aarray1 aasort aasorti argtest arraysort \
arraysort2 arraytype asortbool backw badargs beginfile1 beginfile2 \
binmode1 charasbytes clos1way clos1way2 clos1way3 clos1way4 \
- clos1way5 clos1way6 colonwarn commas crlf csv1 dbugeval dbugeval2 \
+ clos1way5 clos1way6 colonwarn commas crlf csv1 csv2 dbugeval dbugeval2 \
dbugeval3 dbugeval4 dbugtypedre1 dbugtypedre2 delsub \
devfd devfd1 devfd2 dfacheck1 dumpvars \
elemnew1 elemnew2 elemnew3 errno exit fieldwdth forcenum fpat1 fpat2 \
@@ -1608,7 +1609,7 @@ NEED_TRADITIONAL = litoct tradanch rscompat
NEED_PMA = pma
# List of tests that need --csv
-NEED_CSV = csv1
+NEED_CSV = csv1 csv2
# Lists of tests that run a shell script
RUN_SHELL = exit fflush localenl modifiers next randtest rtlen rtlen01
diff --git a/test/Makefile.in b/test/Makefile.in
index 28c5ebc5..ce2c7744 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -489,7 +489,8 @@ EXTRA_DIST = \
crlf.ok \
csv1.awk \
csv1.in \
- csv1.ok \
+ csv2.awk \
+ csv2.ok \
datanonl.awk \
datanonl.in \
datanonl.ok \
@@ -1770,7 +1771,7 @@ GAWK_EXT_TESTS = \
aadelete1 aadelete2 aarray1 aasort aasorti argtest arraysort \
arraysort2 arraytype asortbool backw badargs beginfile1 beginfile2 \
binmode1 charasbytes clos1way clos1way2 clos1way3 clos1way4 \
- clos1way5 clos1way6 colonwarn commas crlf csv1 dbugeval dbugeval2 \
+ clos1way5 clos1way6 colonwarn commas crlf csv1 csv2 dbugeval dbugeval2 \
dbugeval3 dbugeval4 dbugtypedre1 dbugtypedre2 delsub \
devfd devfd1 devfd2 dfacheck1 dumpvars \
elemnew1 elemnew2 elemnew3 errno exit fieldwdth forcenum fpat1 fpat2 \
@@ -1872,7 +1873,7 @@ NEED_TRADITIONAL = litoct tradanch rscompat
NEED_PMA = pma
# List of tests that need --csv
-NEED_CSV = csv1
+NEED_CSV = csv1 csv2
# Lists of tests that run a shell script
RUN_SHELL = exit fflush localenl modifiers next randtest rtlen rtlen01
@@ -4490,6 +4491,11 @@ csv1:
@-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --csv < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+csv2:
+ @echo $@
+ @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --csv >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
dbugeval2:
@echo $@
@-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --debug < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/Maketests b/test/Maketests
index d284aab6..ed1a050e 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -1417,6 +1417,11 @@ csv1:
@-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --csv < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+csv2:
+ @echo $@
+ @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --csv >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
dbugeval2:
@echo $@
@-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --debug < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/csv2.awk b/test/csv2.awk
new file mode 100644
index 00000000..93cbca65
--- /dev/null
+++ b/test/csv2.awk
@@ -0,0 +1,34 @@
+BEGIN {
+ text[1] = "a"; expected[1] = 1
+ text[2] = " a"; expected[2] = 1
+ text[3] = ",a"; expected[3] = 2
+ text[4] = " , a"; expected[4] = 2
+ text[5] = "a,b"; expected[5] = 2
+ text[6] = "a,b,c"; expected[6] = 3
+ text[7] = "\"\""; expected[7] = 1
+ text[8] = "\"abc\""; expected[8] = 1
+ text[9] = "\"a\"\"b\""; expected[9] = 1
+ text[10] = "\"a\",\"b\""; expected[10] = 2
+ text[11] = "a\"\"b"; expected[11] = 1
+ text[12] = "\"a,b\""; expected[12] = 1
+ text[13] = "\"\"\"\""; expected[13] = 1
+ text[14] = "\"\"\"\"\"\""; expected[14] = 1
+ text[15] = "\"\"\"x\"\"\""; expected[15] = 1
+ text[16] = ",,\"\""; expected[16] = 3
+ text[17] = "a\"\"b"; expected[17] = 1
+ text[18] = "a\"b"; expected[18] = 1
+ text[19] = "a''b"; expected[19] = 1
+ text[20] = "\"abc"; expected[20] = 1
+ text[21] = ",,"; expected[21] = 3
+ text[22] = "a,"; expected[22] = 2
+ text[23] = "\"\","; expected[23] = 2
+ text[24] = ","; expected[24] = 2
+ text[25] = "\"abc\",def"; expected[25] = 2
+
+ for (i = 1; i <= length(text); i++) {
+ n = split(text[i], array)
+ if (n != expected[i])
+ printf("text[%d] = <%s>, expected %d, got %d\n",
+ i, text[i], expected[i], n)
+ }
+}
diff --git a/test/csv2.ok b/test/csv2.ok
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/test/csv2.ok