diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2023-03-17 15:22:54 +0200 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2023-03-17 15:22:54 +0200 |
commit | 428e9bffb9a3410452d76b142b24d6e8d48397ab (patch) | |
tree | e092d7025d4eeffc01a67d68ea39e53a244768dd | |
parent | 2e18b77f5b6926e6616ce22d2d3e6d511de69c9b (diff) | |
download | gawk-428e9bffb9a3410452d76b142b24d6e8d48397ab.tar.gz |
Update split() for CSV data.
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | field.c | 14 | ||||
-rw-r--r-- | pc/ChangeLog | 4 | ||||
-rw-r--r-- | pc/Makefile.tst | 9 | ||||
-rw-r--r-- | test/ChangeLog | 5 | ||||
-rw-r--r-- | test/Makefile.am | 7 | ||||
-rw-r--r-- | test/Makefile.in | 12 | ||||
-rw-r--r-- | test/Maketests | 5 | ||||
-rw-r--r-- | test/csv2.awk | 34 | ||||
-rw-r--r-- | test/csv2.ok | 0 |
10 files changed, 76 insertions, 18 deletions
@@ -1,3 +1,7 @@ +2023-03-17 Arnold D. Robbins <arnold@skeeve.com> + + * field.c (do_split): Modify behavior for --csv. + 2023-03-16 Arnold D. Robbins <arnold@skeeve.com> * awk.h (enum do_flag_values): Add DO_CSV. @@ -1173,7 +1173,10 @@ do_split(int nargs) if ((sep->flags & REGEX) != 0) sep = sep->typed_re; - if ( (sep->re_flags & FS_DFLT) != 0 + if (do_csv && (sep->re_flags & FS_DFLT) != 0 && nargs == 3) { + fs = NULL; + parseit = comma_parse_field; + } else if ( (sep->re_flags & FS_DFLT) != 0 && current_field_sep() == Using_FS && ! RS_is_null) { parseit = parse_field; @@ -1194,15 +1197,6 @@ do_split(int nargs) } else if (fs->stlen == 1 && (sep->re_flags & CONSTANT) == 0) { if (fs->stptr[0] == ' ') { parseit = def_parse_field; - } else if (fs->stptr[0] == ',' && ! do_posix) { - static bool warned = false; - - parseit = comma_parse_field; - - if (do_lint && ! warned) { - warned = true; - lintwarn(_("split: CSV parsing is a non-standard extension")); - } } else parseit = sc_parse_field; } else { diff --git a/pc/ChangeLog b/pc/ChangeLog index 541be9c5..12c14c72 100644 --- a/pc/ChangeLog +++ b/pc/ChangeLog @@ -1,3 +1,7 @@ +2023-03-17 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.tst: Regenerated. + 2023-03-16 Arnold D. Robbins <arnold@skeeve.com> * Makefile.tst: Regenerated. diff --git a/pc/Makefile.tst b/pc/Makefile.tst index 72f8a9cb..7228004b 100644 --- a/pc/Makefile.tst +++ b/pc/Makefile.tst @@ -190,7 +190,7 @@ GAWK_EXT_TESTS = \ aadelete1 aadelete2 aarray1 aasort aasorti argtest arraysort \ arraysort2 arraytype asortbool backw badargs beginfile1 beginfile2 \ binmode1 charasbytes clos1way clos1way2 clos1way3 clos1way4 \ - clos1way5 clos1way6 colonwarn commas crlf csv1 dbugeval dbugeval2 \ + clos1way5 clos1way6 colonwarn commas crlf csv1 csv2 dbugeval dbugeval2 \ dbugeval3 dbugeval4 dbugtypedre1 dbugtypedre2 delsub \ devfd devfd1 devfd2 dfacheck1 dumpvars \ elemnew1 elemnew2 elemnew3 errno exit fieldwdth forcenum fpat1 fpat2 \ @@ -292,7 +292,7 @@ NEED_TRADITIONAL = litoct tradanch rscompat NEED_PMA = pma # List of tests that need --csv -NEED_CSV = csv1 +NEED_CSV = csv1 csv2 # Lists of tests that run a shell script RUN_SHELL = exit fflush localenl modifiers next randtest rtlen rtlen01 @@ -2727,6 +2727,11 @@ csv1: @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --csv < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +csv2: + @echo $@ + @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --csv >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + dbugeval2: @echo $@ @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --debug < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/ChangeLog b/test/ChangeLog index f247c540..989d105b 100644 --- a/test/ChangeLog +++ b/test/ChangeLog @@ -1,3 +1,8 @@ +2023-03-17 Arnold D. Robbins <arnold@skeeve.com> + + * Makefile.am (EXTRA_DIST, NEED_CSV, GAWK_EXT_TESTS): New test, csv2. + * csv2.awk, csv2.ok: New files. + 2023-03-16 Arnold D. Robbins <arnold@skeeve.com> * Gentests: Handle NEED_CSV. diff --git a/test/Makefile.am b/test/Makefile.am index 7bb4c983..e0be391c 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -225,7 +225,8 @@ EXTRA_DIST = \ crlf.ok \ csv1.awk \ csv1.in \ - csv1.ok \ + csv2.awk \ + csv2.ok \ datanonl.awk \ datanonl.in \ datanonl.ok \ @@ -1506,7 +1507,7 @@ GAWK_EXT_TESTS = \ aadelete1 aadelete2 aarray1 aasort aasorti argtest arraysort \ arraysort2 arraytype asortbool backw badargs beginfile1 beginfile2 \ binmode1 charasbytes clos1way clos1way2 clos1way3 clos1way4 \ - clos1way5 clos1way6 colonwarn commas crlf csv1 dbugeval dbugeval2 \ + clos1way5 clos1way6 colonwarn commas crlf csv1 csv2 dbugeval dbugeval2 \ dbugeval3 dbugeval4 dbugtypedre1 dbugtypedre2 delsub \ devfd devfd1 devfd2 dfacheck1 dumpvars \ elemnew1 elemnew2 elemnew3 errno exit fieldwdth forcenum fpat1 fpat2 \ @@ -1608,7 +1609,7 @@ NEED_TRADITIONAL = litoct tradanch rscompat NEED_PMA = pma # List of tests that need --csv -NEED_CSV = csv1 +NEED_CSV = csv1 csv2 # Lists of tests that run a shell script RUN_SHELL = exit fflush localenl modifiers next randtest rtlen rtlen01 diff --git a/test/Makefile.in b/test/Makefile.in index 28c5ebc5..ce2c7744 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -489,7 +489,8 @@ EXTRA_DIST = \ crlf.ok \ csv1.awk \ csv1.in \ - csv1.ok \ + csv2.awk \ + csv2.ok \ datanonl.awk \ datanonl.in \ datanonl.ok \ @@ -1770,7 +1771,7 @@ GAWK_EXT_TESTS = \ aadelete1 aadelete2 aarray1 aasort aasorti argtest arraysort \ arraysort2 arraytype asortbool backw badargs beginfile1 beginfile2 \ binmode1 charasbytes clos1way clos1way2 clos1way3 clos1way4 \ - clos1way5 clos1way6 colonwarn commas crlf csv1 dbugeval dbugeval2 \ + clos1way5 clos1way6 colonwarn commas crlf csv1 csv2 dbugeval dbugeval2 \ dbugeval3 dbugeval4 dbugtypedre1 dbugtypedre2 delsub \ devfd devfd1 devfd2 dfacheck1 dumpvars \ elemnew1 elemnew2 elemnew3 errno exit fieldwdth forcenum fpat1 fpat2 \ @@ -1872,7 +1873,7 @@ NEED_TRADITIONAL = litoct tradanch rscompat NEED_PMA = pma # List of tests that need --csv -NEED_CSV = csv1 +NEED_CSV = csv1 csv2 # Lists of tests that run a shell script RUN_SHELL = exit fflush localenl modifiers next randtest rtlen rtlen01 @@ -4490,6 +4491,11 @@ csv1: @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --csv < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +csv2: + @echo $@ + @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --csv >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + dbugeval2: @echo $@ @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --debug < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/Maketests b/test/Maketests index d284aab6..ed1a050e 100644 --- a/test/Maketests +++ b/test/Maketests @@ -1417,6 +1417,11 @@ csv1: @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --csv < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ +csv2: + @echo $@ + @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --csv >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ + @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@ + dbugeval2: @echo $@ @-AWKPATH="$(srcdir)" $(AWK) -f $@.awk --debug < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@ diff --git a/test/csv2.awk b/test/csv2.awk new file mode 100644 index 00000000..93cbca65 --- /dev/null +++ b/test/csv2.awk @@ -0,0 +1,34 @@ +BEGIN { + text[1] = "a"; expected[1] = 1 + text[2] = " a"; expected[2] = 1 + text[3] = ",a"; expected[3] = 2 + text[4] = " , a"; expected[4] = 2 + text[5] = "a,b"; expected[5] = 2 + text[6] = "a,b,c"; expected[6] = 3 + text[7] = "\"\""; expected[7] = 1 + text[8] = "\"abc\""; expected[8] = 1 + text[9] = "\"a\"\"b\""; expected[9] = 1 + text[10] = "\"a\",\"b\""; expected[10] = 2 + text[11] = "a\"\"b"; expected[11] = 1 + text[12] = "\"a,b\""; expected[12] = 1 + text[13] = "\"\"\"\""; expected[13] = 1 + text[14] = "\"\"\"\"\"\""; expected[14] = 1 + text[15] = "\"\"\"x\"\"\""; expected[15] = 1 + text[16] = ",,\"\""; expected[16] = 3 + text[17] = "a\"\"b"; expected[17] = 1 + text[18] = "a\"b"; expected[18] = 1 + text[19] = "a''b"; expected[19] = 1 + text[20] = "\"abc"; expected[20] = 1 + text[21] = ",,"; expected[21] = 3 + text[22] = "a,"; expected[22] = 2 + text[23] = "\"\","; expected[23] = 2 + text[24] = ","; expected[24] = 2 + text[25] = "\"abc\",def"; expected[25] = 2 + + for (i = 1; i <= length(text); i++) { + n = split(text[i], array) + if (n != expected[i]) + printf("text[%d] = <%s>, expected %d, got %d\n", + i, text[i], expected[i], n) + } +} diff --git a/test/csv2.ok b/test/csv2.ok new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/test/csv2.ok |