diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2023-03-23 20:03:05 +0200 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2023-03-23 20:03:05 +0200 |
commit | 79b94a400aa1fbcf26e0535c1f6345ea7c1fbb54 (patch) | |
tree | 0b5fbe15c20ca4c455d6ecbf67807de4caa5aa6a /doc | |
parent | e7720935594d7442f92eecbe08eb69857aab8c4e (diff) | |
parent | 8bf639a108edfb478f8e0b11d41ddea5d85c7826 (diff) | |
download | gawk-79b94a400aa1fbcf26e0535c1f6345ea7c1fbb54.tar.gz |
Merge branch 'master' into feature/backslash-u
Diffstat (limited to 'doc')
-rw-r--r-- | doc/ChangeLog | 51 | ||||
-rw-r--r-- | doc/Makefile.am | 51 | ||||
-rw-r--r-- | doc/Makefile.in | 100 | ||||
-rw-r--r-- | doc/gawk.info | 1224 | ||||
-rw-r--r-- | doc/gawk.texi | 164 | ||||
-rw-r--r-- | doc/gawk_api-figure1.eps (renamed from doc/api-figure1.eps) | 0 | ||||
-rw-r--r-- | doc/gawk_api-figure1.fig (renamed from doc/api-figure1.fig) | 0 | ||||
-rw-r--r-- | doc/gawk_api-figure1.pdf (renamed from doc/api-figure1.pdf) | bin | 10707 -> 10707 bytes | |||
-rw-r--r-- | doc/gawk_api-figure1.png (renamed from doc/api-figure1.png) | bin | 9183 -> 9183 bytes | |||
-rw-r--r-- | doc/gawk_api-figure1.txt (renamed from doc/api-figure1.txt) | 0 | ||||
-rw-r--r-- | doc/gawk_api-figure2.eps (renamed from doc/api-figure2.eps) | 0 | ||||
-rw-r--r-- | doc/gawk_api-figure2.fig (renamed from doc/api-figure2.fig) | 0 | ||||
-rw-r--r-- | doc/gawk_api-figure2.pdf (renamed from doc/api-figure2.pdf) | bin | 12031 -> 12031 bytes | |||
-rw-r--r-- | doc/gawk_api-figure2.png (renamed from doc/api-figure2.png) | bin | 8983 -> 8983 bytes | |||
-rw-r--r-- | doc/gawk_api-figure2.txt (renamed from doc/api-figure2.txt) | 0 | ||||
-rw-r--r-- | doc/gawk_api-figure3.eps (renamed from doc/api-figure3.eps) | 0 | ||||
-rw-r--r-- | doc/gawk_api-figure3.fig (renamed from doc/api-figure3.fig) | 0 | ||||
-rw-r--r-- | doc/gawk_api-figure3.pdf (renamed from doc/api-figure3.pdf) | bin | 12345 -> 12345 bytes | |||
-rw-r--r-- | doc/gawk_api-figure3.png (renamed from doc/api-figure3.png) | bin | 8860 -> 8860 bytes | |||
-rw-r--r-- | doc/gawk_api-figure3.txt (renamed from doc/api-figure3.txt) | 0 | ||||
-rw-r--r-- | doc/gawk_array-elements.eps (renamed from doc/array-elements.eps) | 0 | ||||
-rw-r--r-- | doc/gawk_array-elements.fig (renamed from doc/array-elements.fig) | 0 | ||||
-rw-r--r-- | doc/gawk_array-elements.pdf (renamed from doc/array-elements.pdf) | bin | 6796 -> 6796 bytes | |||
-rw-r--r-- | doc/gawk_array-elements.png (renamed from doc/array-elements.png) | bin | 6091 -> 6091 bytes | |||
-rw-r--r-- | doc/gawk_array-elements.txt (renamed from doc/array-elements.txt) | 0 | ||||
-rw-r--r-- | doc/gawk_general-program.eps (renamed from doc/general-program.eps) | 0 | ||||
-rw-r--r-- | doc/gawk_general-program.fig (renamed from doc/general-program.fig) | 0 | ||||
-rw-r--r-- | doc/gawk_general-program.pdf (renamed from doc/general-program.pdf) | bin | 5613 -> 5613 bytes | |||
-rw-r--r-- | doc/gawk_general-program.png (renamed from doc/general-program.png) | bin | 6078 -> 6078 bytes | |||
-rw-r--r-- | doc/gawk_general-program.txt (renamed from doc/general-program.txt) | 0 | ||||
-rw-r--r-- | doc/gawk_process-flow.eps (renamed from doc/process-flow.eps) | 0 | ||||
-rw-r--r-- | doc/gawk_process-flow.fig (renamed from doc/process-flow.fig) | 0 | ||||
-rw-r--r-- | doc/gawk_process-flow.pdf (renamed from doc/process-flow.pdf) | bin | 6583 -> 6583 bytes | |||
-rw-r--r-- | doc/gawk_process-flow.png (renamed from doc/process-flow.png) | bin | 7856 -> 7856 bytes | |||
-rw-r--r-- | doc/gawk_process-flow.txt (renamed from doc/process-flow.txt) | 0 | ||||
-rw-r--r-- | doc/gawk_statist.eps (renamed from doc/statist.eps) | 0 | ||||
-rw-r--r-- | doc/gawk_statist.jpg (renamed from doc/statist.jpg) | bin | 20400 -> 20400 bytes | |||
-rw-r--r-- | doc/gawk_statist.pdf (renamed from doc/statist.pdf) | bin | 5313 -> 5313 bytes | |||
-rw-r--r-- | doc/gawk_statist.txt (renamed from doc/statist.txt) | 0 | ||||
-rw-r--r-- | doc/gawkinet.info | 1256 | ||||
-rw-r--r-- | doc/gawkinet.texi | 8 | ||||
-rw-r--r-- | doc/gawktexi.in | 164 | ||||
-rw-r--r-- | doc/gawkworkflow.info | 1016 | ||||
-rw-r--r-- | doc/gawkworkflow.texi | 27 | ||||
-rwxr-xr-x | doc/it/ChangeLog | 25 | ||||
-rw-r--r-- | doc/it/gawk-api-figura1.eps (renamed from doc/it/api-figura1.eps) | 0 | ||||
-rw-r--r-- | doc/it/gawk-api-figura1.fig (renamed from doc/it/api-figura1.fig) | 0 | ||||
-rw-r--r-- | doc/it/gawk-api-figura1.pdf (renamed from doc/it/api-figura1.pdf) | bin | 9120 -> 9120 bytes | |||
-rw-r--r-- | doc/it/gawk-api-figura1.png (renamed from doc/it/api-figura1.png) | bin | 5747 -> 5747 bytes | |||
-rw-r--r-- | doc/it/gawk-api-figura1.txt (renamed from doc/it/api-figura1.txt) | 0 | ||||
-rw-r--r-- | doc/it/gawk-api-figura2.eps (renamed from doc/it/api-figura2.eps) | 0 | ||||
-rw-r--r-- | doc/it/gawk-api-figura2.fig (renamed from doc/it/api-figura2.fig) | 0 | ||||
-rw-r--r-- | doc/it/gawk-api-figura2.pdf (renamed from doc/it/api-figura2.pdf) | bin | 11596 -> 11596 bytes | |||
-rw-r--r-- | doc/it/gawk-api-figura2.png (renamed from doc/it/api-figura2.png) | bin | 5768 -> 5768 bytes | |||
-rw-r--r-- | doc/it/gawk-api-figura2.txt (renamed from doc/it/api-figura2.txt) | 0 | ||||
-rw-r--r-- | doc/it/gawk-api-figura3.eps (renamed from doc/it/api-figura3.eps) | 0 | ||||
-rw-r--r-- | doc/it/gawk-api-figura3.fig (renamed from doc/it/api-figura3.fig) | 0 | ||||
-rw-r--r-- | doc/it/gawk-api-figura3.pdf (renamed from doc/it/api-figura3.pdf) | bin | 11914 -> 11914 bytes | |||
-rw-r--r-- | doc/it/gawk-api-figura3.png (renamed from doc/it/api-figura3.png) | bin | 5734 -> 5734 bytes | |||
-rw-r--r-- | doc/it/gawk-api-figura3.txt (renamed from doc/it/api-figura3.txt) | 0 | ||||
-rw-r--r-- | doc/it/gawk-flusso-elaborazione.eps (renamed from doc/it/flusso-elaborazione.eps) | 0 | ||||
-rw-r--r-- | doc/it/gawk-flusso-elaborazione.fig (renamed from doc/it/flusso-elaborazione.fig) | 0 | ||||
-rw-r--r-- | doc/it/gawk-flusso-elaborazione.pdf (renamed from doc/it/flusso-elaborazione.pdf) | bin | 9672 -> 9672 bytes | |||
-rw-r--r-- | doc/it/gawk-flusso-elaborazione.png (renamed from doc/it/flusso-elaborazione.png) | bin | 6300 -> 6300 bytes | |||
-rw-r--r-- | doc/it/gawk-flusso-elaborazione.txt (renamed from doc/it/flusso-elaborazione.txt) | 0 | ||||
-rw-r--r-- | doc/it/gawk-programma-generico.eps (renamed from doc/it/programma-generico.eps) | 0 | ||||
-rw-r--r-- | doc/it/gawk-programma-generico.fig (renamed from doc/it/programma-generico.fig) | 0 | ||||
-rw-r--r-- | doc/it/gawk-programma-generico.pdf (renamed from doc/it/programma-generico.pdf) | bin | 5313 -> 5313 bytes | |||
-rw-r--r-- | doc/it/gawk-programma-generico.png (renamed from doc/it/programma-generico.png) | bin | 4151 -> 4151 bytes | |||
-rw-r--r-- | doc/it/gawk-programma-generico.txt (renamed from doc/it/programma-generico.txt) | 0 | ||||
-rw-r--r-- | doc/it/gawk-vettore-elementi.eps (renamed from doc/it/vettore-elementi.eps) | 0 | ||||
-rw-r--r-- | doc/it/gawk-vettore-elementi.fig (renamed from doc/it/vettore-elementi.fig) | 0 | ||||
-rw-r--r-- | doc/it/gawk-vettore-elementi.pdf (renamed from doc/it/vettore-elementi.pdf) | bin | 7009 -> 7009 bytes | |||
-rw-r--r-- | doc/it/gawk-vettore-elementi.png (renamed from doc/it/vettore-elementi.png) | bin | 1032 -> 1032 bytes | |||
-rw-r--r-- | doc/it/gawk-vettore-elementi.txt (renamed from doc/it/vettore-elementi.txt) | 0 | ||||
-rwxr-xr-x | doc/it/gawkbug.1 | 84 | ||||
-rwxr-xr-x | doc/it/gawktexi.in | 226 | ||||
-rw-r--r-- | doc/pm-gawk.info | 645 | ||||
-rw-r--r-- | doc/wordlist | 4 |
79 files changed, 2746 insertions, 2299 deletions
diff --git a/doc/ChangeLog b/doc/ChangeLog index 00c0d93a..3df4953d 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,53 @@ +2023-03-09 Arnold D. Robbins <arnold@skeeve.com> + + * gawkworkflow.texi (UPDATE-MONTH, EDITION): Updated. + Copyright, add current year. + (Submitting Your Changes): Add a note about diffs for generated + files also being in the output of `git diff'. Thanks to Manual + Collado for the suggestion. + (New feature development): Fix the text of a command. + + Unrelated: + + * Makefile.am: Adjust things to get 'make distcheck' to pass. + +2023-03-09 Manuel Collado <mcollado2011@gmail.com> + + * gawktexi.in, gawkinet.texi, *.{png,jpg,eps,pdf,txt,fig}: rename + image file names to ensure a gawk_ prefix (except *flashlight*). + * Makefile.am: Ditto. And install image files for .info and .html + docs. + +2023-03-02 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in (Input Parsers): Clarify and improve some of the + prose, some more. + (Output Wrappers): Ditto. + +2023-02-27 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in (Include Files): Mention that -i and @include are + the same and that files are only included once, even if nested + in multiple include files. + (Input Parsers): Improve discussion of struct stat buf. + +2023-02-26 Arnold D. Robbins <arnold@skeeve.com> + + * Multiple files: Remove trailing whitespace. + +2023-02-25 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in (Input Parsers): Clarify and improve some of the prose. + +2023-02-24 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in (Feature History): Add note about nonfatal I/O. + +2023-02-15 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in (EDITION): Bump to 5.3. Thanks to Antonio + Columbo for the suggestion. + 2023-02-10 Arnold D. Robbins <arnold@skeeve.com> * gawktexi.in (Noflush): New section. @@ -7,7 +57,6 @@ * gawktexi.in (Feature History): Clean up the entry for version 5.3. - 2023-02-05 Arnold D. Robbins <arnold@skeeve.com> * texinfo.tex: Update from GNULIB. Only change is to remove diff --git a/doc/Makefile.am b/doc/Makefile.am index 8cb061f7..47f36e05 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -29,27 +29,25 @@ info_TEXINFOS = gawk.texi gawkinet.texi gawkworkflow.texi pm-gawk.texi man_MANS = gawk.1 gawkbug.1 pm-gawk.1 +png_images = gawk_api-figure1.png gawk_api-figure2.png gawk_api-figure3.png \ + gawk_array-elements.png gawk_general-program.png gawk_process-flow.png +html_images = $(png_images) gawk_statist.jpg + +fig_images = $(png_images:%.png=%.fig) +txt_images = $(png_images:%.png=%.txt) gawk_statist.txt +eps_images = $(txt_images:%.txt=%.eps) gawk_statist.eps +pdf_images = $(txt_images:%.txt=%.pdf) gawk_statist.pdf + EXTRA_DIST = ChangeLog ChangeLog.0 ChangeLog.1 \ README.card ad.block setter.outline \ awkcard.in awkforai.txt texinfo.tex cardfonts \ - api-figure1.eps api-figure1.fig api-figure1.pdf \ - api-figure1.png api-figure1.txt \ - api-figure2.eps api-figure2.fig api-figure2.pdf \ - api-figure2.png api-figure2.txt \ - api-figure3.eps api-figure3.fig api-figure3.pdf \ - api-figure3.png api-figure3.txt \ - array-elements.eps array-elements.fig array-elements.pdf \ - array-elements.png array-elements.txt \ + $(fig_images) $(txt_images) $(eps_images) $(pdf_images) \ + $(html_images) \ gawktexi.in sidebar.awk \ - general-program.eps general-program.fig general-program.pdf \ - general-program.png general-program.txt \ it \ - process-flow.eps process-flow.fig process-flow.pdf \ - process-flow.png process-flow.txt \ macros colors no.colors $(man_MANS) \ lflashlight-small.xpic lflashlight.eps lflashlight.pdf \ rflashlight-small.xpic rflashlight.eps rflashlight.pdf \ - statist.jpg statist.eps statist.pdf statist.txt \ wordlist wordlist2 wordlist3 wordlist4 wordlist5 wordlist6 \ bc_notes @@ -159,3 +157,30 @@ spellinet: @echo ==== gawkinet.texi ==== export LC_ALL=C ; spell "$(srcdir)"/gawkinet.texi | \ sort -u | comm -23 - "$(srcdir)"/wordlist4 + +# Install/unistall graphic image files in the info/ dir + +imagedir = $(infodir) +image_DATA = $(html_images) + +# Install/unistall graphic image files in the html doc dir + +install-html-local: + @$(NORMAL_INSTALL) + @list='$(html_images)'; test -n "$(htmldir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(htmldir)" || exit 1; \ + fi; \ + for p in $$list; do \ + $(INSTALL_DATA) $$p "$(DESTDIR)$(htmldir)" || exit $$?; \ + done; + +uninstall-local: + @$(NORMAL_UNINSTALL) + @list='$(html_images)'; test -n "$(htmldir)" || list=; \ + for p in $$list; do \ + echo " rm -rf '$(DESTDIR)$(htmldir)/$$p'"; \ + rm -rf "$(DESTDIR)$(htmldir)/$$p"; \ + done + diff --git a/doc/Makefile.in b/doc/Makefile.in index 57cea374..edbbca33 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -38,6 +38,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA # + VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ @@ -196,7 +197,8 @@ am__can_run_installinfo = \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac -am__installdirs = "$(DESTDIR)$(infodir)" "$(DESTDIR)$(man1dir)" +am__installdirs = "$(DESTDIR)$(infodir)" "$(DESTDIR)$(man1dir)" \ + "$(DESTDIR)$(imagedir)" am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ @@ -227,6 +229,7 @@ am__uninstall_files_from_dir = { \ man1dir = $(mandir)/man1 NROFF = nroff MANS = $(man_MANS) +DATA = $(image_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) am__DIST_COMMON = $(srcdir)/Makefile.in \ $(top_srcdir)/build-aux/texinfo.tex ChangeLog texinfo.tex @@ -364,27 +367,24 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ info_TEXINFOS = gawk.texi gawkinet.texi gawkworkflow.texi pm-gawk.texi man_MANS = gawk.1 gawkbug.1 pm-gawk.1 +png_images = gawk_api-figure1.png gawk_api-figure2.png gawk_api-figure3.png \ + gawk_array-elements.png gawk_general-program.png gawk_process-flow.png + +html_images = $(png_images) gawk_statist.jpg +fig_images = $(png_images:%.png=%.fig) +txt_images = $(png_images:%.png=%.txt) gawk_statist.txt +eps_images = $(txt_images:%.txt=%.eps) gawk_statist.eps +pdf_images = $(txt_images:%.txt=%.pdf) gawk_statist.pdf EXTRA_DIST = ChangeLog ChangeLog.0 ChangeLog.1 \ README.card ad.block setter.outline \ awkcard.in awkforai.txt texinfo.tex cardfonts \ - api-figure1.eps api-figure1.fig api-figure1.pdf \ - api-figure1.png api-figure1.txt \ - api-figure2.eps api-figure2.fig api-figure2.pdf \ - api-figure2.png api-figure2.txt \ - api-figure3.eps api-figure3.fig api-figure3.pdf \ - api-figure3.png api-figure3.txt \ - array-elements.eps array-elements.fig array-elements.pdf \ - array-elements.png array-elements.txt \ + $(fig_images) $(txt_images) $(eps_images) $(pdf_images) \ + $(html_images) \ gawktexi.in sidebar.awk \ - general-program.eps general-program.fig general-program.pdf \ - general-program.png general-program.txt \ it \ - process-flow.eps process-flow.fig process-flow.pdf \ - process-flow.png process-flow.txt \ macros colors no.colors $(man_MANS) \ lflashlight-small.xpic lflashlight.eps lflashlight.pdf \ rflashlight-small.xpic rflashlight.eps rflashlight.pdf \ - statist.jpg statist.eps statist.pdf statist.txt \ wordlist wordlist2 wordlist3 wordlist4 wordlist5 wordlist6 \ bc_notes @@ -403,6 +403,10 @@ PAPEROPTS = -dpaper=letter -P-pletter # Use this if your troff can correctly handle macros from 'colors' file AWKCARD = awkcard.ps + +# Install/unistall graphic image files in the info/ dir +imagedir = $(infodir) +image_DATA = $(html_images) all: all-am .SUFFIXES: @@ -639,6 +643,27 @@ uninstall-man1: } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) +install-imageDATA: $(image_DATA) + @$(NORMAL_INSTALL) + @list='$(image_DATA)'; test -n "$(imagedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(imagedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(imagedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(imagedir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(imagedir)" || exit $$?; \ + done + +uninstall-imageDATA: + @$(NORMAL_UNINSTALL) + @list='$(image_DATA)'; test -n "$(imagedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(imagedir)'; $(am__uninstall_files_from_dir) tags TAGS: ctags CTAGS: @@ -683,9 +708,9 @@ distdir-am: $(DISTFILES) dist-info check-am: all-am check: check-am -all-am: Makefile $(INFO_DEPS) $(MANS) +all-am: Makefile $(INFO_DEPS) $(MANS) $(DATA) installdirs: - for dir in "$(DESTDIR)$(infodir)" "$(DESTDIR)$(man1dir)"; do \ + for dir in "$(DESTDIR)$(infodir)" "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(imagedir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am @@ -739,7 +764,7 @@ info: info-am info-am: $(INFO_DEPS) -install-data-am: install-info-am install-man +install-data-am: install-imageDATA install-info-am install-man install-dvi: install-dvi-am @@ -762,7 +787,7 @@ install-exec-am: install-html: install-html-am -install-html-am: $(HTMLS) +install-html-am: $(HTMLS) install-html-local @$(NORMAL_INSTALL) @list='$(HTMLS)'; list2=; test -n "$(htmldir)" || list=; \ if test -n "$$list"; then \ @@ -875,8 +900,9 @@ ps: ps-am ps-am: $(PSS) -uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-info-am \ - uninstall-man uninstall-pdf-am uninstall-ps-am +uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-imageDATA \ + uninstall-info-am uninstall-local uninstall-man \ + uninstall-pdf-am uninstall-ps-am uninstall-man: uninstall-man1 @@ -887,15 +913,16 @@ uninstall-man: uninstall-man1 distdir dvi dvi-am html html-am info info-am install \ install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ - install-html-am install-info install-info-am install-man \ - install-man1 install-pdf install-pdf-am install-ps \ - install-ps-am install-strip installcheck installcheck-am \ - installdirs maintainer-clean maintainer-clean-aminfo \ + install-html-am install-html-local install-imageDATA \ + install-info install-info-am install-man install-man1 \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-aminfo \ maintainer-clean-generic mostlyclean mostlyclean-aminfo \ mostlyclean-generic pdf pdf-am pdf-local ps ps-am tags-am \ uninstall uninstall-am uninstall-dvi-am uninstall-html-am \ - uninstall-info-am uninstall-man uninstall-man1 \ - uninstall-pdf-am uninstall-ps-am + uninstall-imageDATA uninstall-info-am uninstall-local \ + uninstall-man uninstall-man1 uninstall-pdf-am uninstall-ps-am .PRECIOUS: Makefile @@ -989,6 +1016,27 @@ spellinet: export LC_ALL=C ; spell "$(srcdir)"/gawkinet.texi | \ sort -u | comm -23 - "$(srcdir)"/wordlist4 +# Install/unistall graphic image files in the html doc dir + +install-html-local: + @$(NORMAL_INSTALL) + @list='$(html_images)'; test -n "$(htmldir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(htmldir)" || exit 1; \ + fi; \ + for p in $$list; do \ + $(INSTALL_DATA) $$p "$(DESTDIR)$(htmldir)" || exit $$?; \ + done; + +uninstall-local: + @$(NORMAL_UNINSTALL) + @list='$(html_images)'; test -n "$(htmldir)" || list=; \ + for p in $$list; do \ + echo " rm -rf '$(DESTDIR)$(htmldir)/$$p'"; \ + rm -rf "$(DESTDIR)$(htmldir)/$$p"; \ + done + # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/doc/gawk.info b/doc/gawk.info index 9ffe2a86..1cc523c5 100644 --- a/doc/gawk.info +++ b/doc/gawk.info @@ -4,7 +4,7 @@ Copyright © 1989, 1991, 1992, 1993, 1996–2005, 2007, 2009–2023 Free Software Foundation, Inc. - This is Edition 5.2 of ‘GAWK: Effective AWK Programming: A User’s + This is Edition 5.3 of ‘GAWK: Effective AWK Programming: A User’s Guide for GNU Awk’, for the 5.3.0 (or later) version of the GNU implementation of AWK. @@ -41,7 +41,7 @@ particular records in a file and perform operations upon them. Free Software Foundation, Inc. - This is Edition 5.2 of ‘GAWK: Effective AWK Programming: A User’s + This is Edition 5.3 of ‘GAWK: Effective AWK Programming: A User’s Guide for GNU Awk’, for the 5.3.0 (or later) version of the GNU implementation of AWK. @@ -3415,6 +3415,10 @@ reducing the need for writing complex and tedious command lines. In particular, ‘@include’ is very useful for writing CGI scripts to be run from web pages. + The ‘@include’ directive and the ‘-i’/‘--include’ command line option +are completely equivalent. An included program source is not loaded if +it has been previously loaded. + The rules for finding a source file described in *note AWKPATH Variable:: also apply to files loaded with ‘@include’. @@ -12049,7 +12053,7 @@ figure-array-elements, conceptually, if the element values are eight, ‘"foo"’, ‘""’, and 30. - + | 8 | \"foo\" | \"\" | 30 | Value +---------+---------+--------+---------+ 0 1 2 3 Index" @@ -25863,7 +25867,7 @@ fields are function pointers. This is shown in *note Figure 17.1: figure-load-extension. - + Struct +---+ | | @@ -25897,7 +25901,7 @@ symbols. One of these function pointers is to a function for figure-register-new-function. - + +--------------------------------------------+ | | @@ -25920,7 +25924,7 @@ calling convention. This is shown in *note Figure 17.3: figure-call-new-function. - + chdir(\"/path\") (*fnptr)(1); } +--------------------------------------------+ @@ -26772,9 +26776,10 @@ File: gawk.info, Node: Input Parsers, Next: Output Wrappers, Prev: Extension ................................. By default, ‘gawk’ reads text files as its input. It uses the value of -‘RS’ to find the end of the record, and then uses ‘FS’ (or ‘FIELDWIDTHS’ -or ‘FPAT’) to split it into fields (*note Reading Files::). -Additionally, it sets the value of ‘RT’ (*note Built-in Variables::). +‘RS’ to find the end of an input record, and then uses ‘FS’ (or +‘FIELDWIDTHS’ or ‘FPAT’) to split it into fields (*note Reading +Files::). Additionally, it sets the value of ‘RT’ (*note Built-in +Variables::). If you want, you can provide your own custom input parser. An input parser’s job is to return a record to the ‘gawk’ record-processing code, @@ -26858,13 +26863,34 @@ as follows: The name of the file. ‘int fd;’ - A file descriptor for the file. If ‘gawk’ was able to open the - file, then ‘fd’ will _not_ be equal to ‘INVALID_HANDLE’. + A file descriptor for the file. ‘gawk’ attempts to open the file + for reading using the ‘open()’ system call. If it was able to open + the file, then ‘fd’ will _not_ be equal to ‘INVALID_HANDLE’. Otherwise, it will. + An extension can decide that it doesn’t want to use the open file + descriptor provided by ‘gawk’. In such a case it can close the + file and set ‘fd’ to ‘INVALID_HANDLE’, or it can leave it alone and + keep it’s own file descriptor in private data pointed to by the + ‘opaque’ pointer (see further in this list). In any case, if the + file descriptor is valid, it should _not_ just overwrite the value + with something else; doing so would cause a resource leak. + ‘struct stat sbuf;’ If the file descriptor is valid, then ‘gawk’ will have filled in - this structure via a call to the ‘fstat()’ system call. + this structure via a call to the ‘fstat()’ system call. Otherwise, + if the ‘lstat()’ system call is available, it will use that. If + ‘lstat()’ is not available, then it uses ‘stat()’. + + Getting the file’s information allows extensions to check the type + of the file even if it could not be opened. This occurs, for + example, on Windows systems when trying to use ‘open()’ on a + directory. + + If ‘gawk’ was not able to get the file information, then ‘sbuf’ + will be zeroed out. In particular, extension code can check if + ‘sbuf.st_mode == 0’. If that’s true, then there is no information + in ‘sbuf’. The ‘XXX_can_take_file()’ function should examine these fields and decide if the input parser should be used for the file. The decision @@ -26895,7 +26921,7 @@ may be filled by ‘XXX_take_control_of()’: input records. Said function is the core of the input parser. Its behavior is described in the text following this list. -‘ssize_t (*read_func)();’ +‘ssize_t (*read_func)(int, void *, size_t);’ This function pointer should point to a function that has the same behavior as the standard POSIX ‘read()’ system call. It is an alternative to the ‘get_record’ pointer. Its behavior is also @@ -26920,13 +26946,13 @@ records. The parameters are as follows: ‘char **out’ This is a pointer to a ‘char *’ variable that is set to point to - the record. ‘gawk’ makes its own copy of the data, so the + the record. ‘gawk’ makes its own copy of the data, so your extension must manage this storage. ‘struct awk_input *iobuf’ - This is the ‘awk_input_buf_t’ for the file. The fields should be - used for reading data (‘fd’) and for managing private state - (‘opaque’), if any. + This is the ‘awk_input_buf_t’ for the file. Two of its fields + should be used by your extension: ‘fd’ for reading data, and + ‘opaque’ for managing any private state. ‘int *errcode’ If an error occurs, ‘*errcode’ should be set to an appropriate code @@ -26937,8 +26963,8 @@ records. The parameters are as follows: If the concept of a “record terminator” makes sense, then ‘*rt_start’ should be set to point to the data to be used for ‘RT’, and ‘*rt_len’ should be set to the length of the data. Otherwise, - ‘*rt_len’ should be set to zero. ‘gawk’ makes its own copy of this - data, so the extension must manage this storage. + ‘*rt_len’ should be set to zero. Here too, ‘gawk’ makes its own + copy of this data, so your extension must manage this storage. ‘const awk_fieldwidth_info_t **field_width’ If ‘field_width’ is not ‘NULL’, then ‘*field_width’ will be @@ -26948,11 +26974,12 @@ records. The parameters are as follows: copied by ‘gawk’; it must persist at least until the next call to ‘get_record’ or ‘close_func’. Note also that ‘field_width’ is ‘NULL’ when ‘getline’ is assigning the results to a variable, thus - field parsing is not needed. If the parser does set - ‘*field_width’, then ‘gawk’ uses this layout to parse the input - record, and the ‘PROCINFO["FS"]’ value will be ‘"API"’ while this - record is active in ‘$0’. The ‘awk_fieldwidth_info_t’ data - structure is described below. + field parsing is not needed. + + If the parser sets ‘*field_width’, then ‘gawk’ uses this layout to + parse the input record, and the ‘PROCINFO["FS"]’ value will be + ‘"API"’ while this record is active in ‘$0’. The + ‘awk_fieldwidth_info_t’ data structure is described below. The return value is the length of the buffer pointed to by ‘*out’, or ‘EOF’ if end-of-file was reached or an error occurred. @@ -26996,12 +27023,7 @@ extension does). Or you may want it to take effect based upon the value of an ‘awk’ variable, as the XML extension from the ‘gawkextlib’ project does (*note gawkextlib::). In the latter case, code in a ‘BEGINFILE’ rule can look at ‘FILENAME’ and ‘ERRNO’ to decide whether or not to -activate an input parser (*note BEGINFILE/ENDFILE::). - - You register your input parser with the following function: - -‘void register_input_parser(awk_input_parser_t *input_parser);’ - Register the input parser pointed to by ‘input_parser’ with ‘gawk’. +activate your input parser (*note BEGINFILE/ENDFILE::). If you would like to override the default field parsing mechanism for a given record, then you must populate an ‘awk_fieldwidth_info_t’ @@ -27043,6 +27065,11 @@ can be used as an argument to ‘malloc()’ or in a union to allocate space statically. Please refer to the ‘readdir_test’ sample extension for an example. + You register your input parser with the following function: + +‘void register_input_parser(awk_input_parser_t *input_parser);’ + Register the input parser pointed to by ‘input_parser’ with ‘gawk’. + File: gawk.info, Node: Output Wrappers, Next: Two-way processors, Prev: Input Parsers, Up: Registration Functions @@ -27137,11 +27164,12 @@ in the ‘awk_output_buf_t’. The data members are as follows: The ‘XXX_can_take_file()’ function should make a decision based upon the ‘name’ and ‘mode’ fields, and any additional state (such as ‘awk’ -variable values) that is appropriate. +variable values) that is appropriate. ‘gawk’ attempts to open the named +file for writing. The ‘fp’ member will be ‘NULL’ only if it fails. When ‘gawk’ calls ‘XXX_take_control_of()’, that function should fill in the other fields as appropriate, except for ‘fp’, which it should -just use normally. +just use normally if it’s not ‘NULL’. You register your output wrapper with the following function: @@ -30677,6 +30705,8 @@ POSIX ‘awk’, in the order they were added to ‘gawk’. • Redirected ‘getline’ became allowed inside ‘BEGINFILE’ and ‘ENDFILE’ (*note BEGINFILE/ENDFILE::). + • Support for nonfatal I/O (*note Nonfatal::). + • The ‘where’ command was added to the debugger (*note Execution Stack::). @@ -33630,7 +33660,7 @@ At the most basic level, the job of a program is to process some input data and produce results. See *note Figure D.1: figure-general-flow. - + +------+ / \\ +---------+ | Data | -----> < Program > -----> | Results | +------+ \\_______/ +---------+" @@ -33646,7 +33676,7 @@ uses the instructions in your program to process the data. basic set of steps, as shown in *note Figure D.2: figure-process-flow.: - + +----------------+ / More \\ No +----------+ | Initialization | -------> < Data > -------> | Clean Up | +----------------+ ^ \\ ? / +----------+ @@ -38016,7 +38046,7 @@ Index * Kasal, Stepan: Acknowledgments. (line 60) * Kelly, Terence: Persistent Memory. (line 76) * Kelly, Terence <1>: Persistent Memory. (line 127) -* Kelly, Terence <2>: Feature History. (line 508) +* Kelly, Terence <2>: Feature History. (line 510) * Kenobi, Obi-Wan: Undocumented. (line 6) * Kernighan, Brian: History. (line 17) * Kernighan, Brian, quotes: Conventions. (line 38) @@ -39492,568 +39522,568 @@ Ref: AWKLIBPATH Variable-Footnote-1156639 Node: Other Environment Variables157036 Node: Exit Status161532 Node: Include Files162247 -Node: Loading Shared Libraries166118 -Node: Obsolete167610 -Node: Undocumented168246 -Node: Invoking Summary168545 -Node: Regexp171572 -Node: Regexp Usage173066 -Node: Escape Sequences175167 -Node: Regexp Operators182503 -Node: Regexp Operator Details182996 -Ref: Regexp Operator Details-Footnote-1190862 -Node: Interval Expressions191021 -Ref: Interval Expressions-Footnote-1193290 -Node: Bracket Expressions193390 -Ref: table-char-classes195950 -Node: Leftmost Longest199472 -Node: Computed Regexps200832 -Node: GNU Regexp Operators204355 -Node: Case-sensitivity208378 -Ref: Case-sensitivity-Footnote-1211335 -Ref: Case-sensitivity-Footnote-2211580 -Node: Regexp Summary211696 -Node: Reading Files213220 -Node: Records215537 -Node: awk split records216648 -Node: gawk split records221538 -Ref: gawk split records-Footnote-1226832 -Node: Fields226869 -Node: Nonconstant Fields229756 -Ref: Nonconstant Fields-Footnote-1232067 -Node: Changing Fields232283 -Node: Field Separators238591 -Node: Default Field Splitting241464 -Node: Regexp Field Splitting242607 -Node: Single Character Fields246436 -Node: Comma Separated Fields247525 -Ref: table-csv-examples248813 -Node: Command Line Field Separator249765 -Node: Full Line Fields253151 -Ref: Full Line Fields-Footnote-1254731 -Ref: Full Line Fields-Footnote-2254777 -Node: Field Splitting Summary254885 -Node: Constant Size257164 -Node: Fixed width data257908 -Node: Skipping intervening261427 -Node: Allowing trailing data262229 -Node: Fields with fixed data263294 -Node: Splitting By Content264920 -Ref: Splitting By Content-Footnote-1269105 -Node: More CSV269268 -Node: FS versus FPAT270921 -Node: Testing field creation272130 -Node: Multiple Line273823 -Node: Getline280305 -Node: Plain Getline282891 -Node: Getline/Variable285541 -Node: Getline/File286738 -Node: Getline/Variable/File288186 -Ref: Getline/Variable/File-Footnote-1289831 -Node: Getline/Pipe289927 -Node: Getline/Variable/Pipe292740 -Node: Getline/Coprocess293923 -Node: Getline/Variable/Coprocess295246 -Node: Getline Notes296012 -Node: Getline Summary298973 -Ref: table-getline-variants299417 -Node: Read Timeout300322 -Ref: Read Timeout-Footnote-1304286 -Node: Retrying Input304344 -Node: Command-line directories305611 -Node: Input Summary306549 -Node: Input Exercises309929 -Node: Printing310369 -Node: Print312312 -Node: Print Examples313818 -Node: Output Separators316671 -Node: OFMT318782 -Node: Printf320205 -Node: Basic Printf321010 -Node: Control Letters322646 -Node: Format Modifiers328115 -Node: Printf Examples334401 -Node: Redirection336946 -Node: Special FD344020 -Ref: Special FD-Footnote-1347310 -Node: Special Files347396 -Node: Other Inherited Files348025 -Node: Special Network349090 -Node: Special Caveats349978 -Node: Close Files And Pipes350961 -Ref: Close Files And Pipes-Footnote-1357097 -Node: Close Return Value357253 -Ref: table-close-pipe-return-values358528 -Ref: Close Return Value-Footnote-1359362 -Node: Noflush359518 -Node: Nonfatal360990 -Node: Output Summary363407 -Node: Output Exercises364693 -Node: Expressions365384 -Node: Values366586 -Node: Constants367264 -Node: Scalar Constants367961 -Ref: Scalar Constants-Footnote-1370536 -Node: Nondecimal-numbers370786 -Node: Regexp Constants373907 -Node: Using Constant Regexps374453 -Node: Standard Regexp Constants375099 -Node: Strong Regexp Constants378399 -Node: Variables382250 -Node: Using Variables382915 -Node: Assignment Options384895 -Node: Conversion387457 -Node: Strings And Numbers387989 -Ref: Strings And Numbers-Footnote-1391208 -Node: Locale influences conversions391317 -Ref: table-locale-affects394167 -Node: All Operators394810 -Node: Arithmetic Ops395451 -Node: Concatenation398281 -Ref: Concatenation-Footnote-1401231 -Node: Assignment Ops401354 -Ref: table-assign-ops406493 -Node: Increment Ops407875 -Node: Truth Values and Conditions411474 -Node: Truth Values412600 -Node: Typing and Comparison413691 -Node: Variable Typing414527 -Ref: Variable Typing-Footnote-1421189 -Ref: Variable Typing-Footnote-2421269 -Node: Comparison Operators421352 -Ref: table-relational-ops421779 -Node: POSIX String Comparison425465 -Ref: POSIX String Comparison-Footnote-1427224 -Ref: POSIX String Comparison-Footnote-2427367 -Node: Boolean Ops427451 -Ref: Boolean Ops-Footnote-1432144 -Node: Conditional Exp432240 -Node: Function Calls434026 -Node: Precedence437976 -Node: Locales441853 -Node: Expressions Summary443535 -Node: Patterns and Actions446198 -Node: Pattern Overview447340 -Node: Regexp Patterns449066 -Node: Expression Patterns449612 -Node: Ranges453521 -Node: BEGIN/END456699 -Node: Using BEGIN/END457512 -Ref: Using BEGIN/END-Footnote-1460422 -Node: I/O And BEGIN/END460532 -Node: BEGINFILE/ENDFILE463013 -Node: Empty466454 -Node: Using Shell Variables466771 -Node: Action Overview469109 -Node: Statements471544 -Node: If Statement473442 -Node: While Statement475011 -Node: Do Statement477099 -Node: For Statement478285 -Node: Switch Statement481642 -Node: Break Statement484193 -Node: Continue Statement486385 -Node: Next Statement488317 -Node: Nextfile Statement490814 -Node: Exit Statement493675 -Node: Built-in Variables496208 -Node: User-modified497387 -Node: Auto-set505598 -Ref: Auto-set-Footnote-1523697 -Ref: Auto-set-Footnote-2523915 -Node: ARGC and ARGV523971 -Node: Pattern Action Summary528410 -Node: Arrays531026 -Node: Array Basics532403 -Node: Array Intro533253 -Ref: figure-array-elements535269 -Ref: Array Intro-Footnote-1538133 -Node: Reference to Elements538265 -Node: Assigning Elements540787 -Node: Array Example541282 -Node: Scanning an Array543251 -Node: Controlling Scanning546348 -Ref: Controlling Scanning-Footnote-1553011 -Node: Numeric Array Subscripts553335 -Node: Uninitialized Subscripts555609 -Node: Delete557288 -Ref: Delete-Footnote-1560102 -Node: Multidimensional560159 -Node: Multiscanning563364 -Node: Arrays of Arrays565036 -Node: Arrays Summary569936 -Node: Functions572125 -Node: Built-in573185 -Node: Calling Built-in574374 -Node: Boolean Functions576421 -Node: Numeric Functions576991 -Ref: Numeric Functions-Footnote-1581184 -Ref: Numeric Functions-Footnote-2581868 -Ref: Numeric Functions-Footnote-3581920 -Node: String Functions582196 -Ref: String Functions-Footnote-1608332 -Ref: String Functions-Footnote-2608466 -Ref: String Functions-Footnote-3608726 -Node: Gory Details608813 -Ref: table-sub-escapes610720 -Ref: table-sub-proposed612366 -Ref: table-posix-sub613876 -Ref: table-gensub-escapes615564 -Ref: Gory Details-Footnote-1616498 -Node: I/O Functions616652 -Ref: table-system-return-values623339 -Ref: I/O Functions-Footnote-1625510 -Ref: I/O Functions-Footnote-2625658 -Node: Time Functions625778 -Ref: Time Functions-Footnote-1636934 -Ref: Time Functions-Footnote-2637010 -Ref: Time Functions-Footnote-3637172 -Ref: Time Functions-Footnote-4637283 -Ref: Time Functions-Footnote-5637401 -Ref: Time Functions-Footnote-6637636 -Node: Bitwise Functions637918 -Ref: table-bitwise-ops638520 -Ref: Bitwise Functions-Footnote-1644774 -Ref: Bitwise Functions-Footnote-2644953 -Node: Type Functions645150 -Node: I18N Functions648743 -Node: User-defined650486 -Node: Definition Syntax651306 -Ref: Definition Syntax-Footnote-1657134 -Node: Function Example657211 -Ref: Function Example-Footnote-1660190 -Node: Function Calling660212 -Node: Calling A Function660806 -Node: Variable Scope661776 -Node: Pass By Value/Reference664830 -Node: Function Caveats667562 -Ref: Function Caveats-Footnote-1669657 -Node: Return Statement669781 -Node: Dynamic Typing672836 -Node: Indirect Calls673792 -Node: Functions Summary684951 -Node: Library Functions687728 -Ref: Library Functions-Footnote-1691276 -Ref: Library Functions-Footnote-2691419 -Node: Library Names691594 -Ref: Library Names-Footnote-1695388 -Ref: Library Names-Footnote-2695615 -Node: General Functions695711 -Node: Strtonum Function696905 -Node: Assert Function699987 -Node: Round Function703439 -Node: Cliff Random Function705017 -Node: Ordinal Functions706050 -Ref: Ordinal Functions-Footnote-1709159 -Ref: Ordinal Functions-Footnote-2709411 -Node: Join Function709625 -Ref: Join Function-Footnote-1711428 -Node: Getlocaltime Function711632 -Node: Readfile Function715406 -Node: Shell Quoting717435 -Node: Isnumeric Function718891 -Node: Data File Management720303 -Node: Filetrans Function720935 -Node: Rewind Function725229 -Node: File Checking727208 -Ref: File Checking-Footnote-1728580 -Node: Empty Files728787 -Node: Ignoring Assigns730854 -Node: Getopt Function732428 -Ref: Getopt Function-Footnote-1748262 -Node: Passwd Functions748474 -Ref: Passwd Functions-Footnote-1757656 -Node: Group Functions757744 -Ref: Group Functions-Footnote-1765882 -Node: Walking Arrays766095 -Node: Library Functions Summary769143 -Node: Library Exercises770567 -Node: Sample Programs771054 -Node: Running Examples771836 -Node: Clones772588 -Node: Cut Program773860 -Node: Egrep Program784301 -Node: Id Program793618 -Node: Split Program803732 -Ref: Split Program-Footnote-1813967 -Node: Tee Program814154 -Node: Uniq Program817063 -Node: Wc Program824928 -Node: Bytes vs. Characters825323 -Node: Using extensions826925 -Node: wc program827705 -Node: Miscellaneous Programs832711 -Node: Dupword Program833940 -Node: Alarm Program836003 -Node: Translate Program840916 -Ref: Translate Program-Footnote-1845657 -Node: Labels Program845935 -Ref: Labels Program-Footnote-1849376 -Node: Word Sorting849468 -Node: History Sorting853662 -Node: Extract Program855937 -Node: Simple Sed864206 -Node: Igawk Program867422 -Ref: Igawk Program-Footnote-1882669 -Ref: Igawk Program-Footnote-2882875 -Ref: Igawk Program-Footnote-3883005 -Node: Anagram Program883132 -Node: Signature Program886228 -Node: Programs Summary887480 -Node: Programs Exercises888738 -Ref: Programs Exercises-Footnote-1893054 -Node: Advanced Features893140 -Node: Nondecimal Data895634 -Node: Boolean Typed Values897264 -Node: Array Sorting899239 -Node: Controlling Array Traversal899968 -Ref: Controlling Array Traversal-Footnote-1908475 -Node: Array Sorting Functions908597 -Ref: Array Sorting Functions-Footnote-1914716 -Node: Two-way I/O914924 -Ref: Two-way I/O-Footnote-1922919 -Ref: Two-way I/O-Footnote-2923110 -Node: TCP/IP Networking923192 -Node: Profiling926372 -Node: Persistent Memory936082 -Ref: Persistent Memory-Footnote-1945040 -Node: Extension Philosophy945171 -Node: Advanced Features Summary946706 -Node: Internationalization948976 -Node: I18N and L10N950682 -Node: Explaining gettext951377 -Ref: Explaining gettext-Footnote-1957530 -Ref: Explaining gettext-Footnote-2957725 -Node: Programmer i18n957890 -Ref: Programmer i18n-Footnote-1963003 -Node: Translator i18n963052 -Node: String Extraction963888 -Ref: String Extraction-Footnote-1965066 -Node: Printf Ordering965164 -Ref: Printf Ordering-Footnote-1968026 -Node: I18N Portability968094 -Ref: I18N Portability-Footnote-1970668 -Node: I18N Example970739 -Ref: I18N Example-Footnote-1974139 -Ref: I18N Example-Footnote-2974215 -Node: Gawk I18N974332 -Node: I18N Summary974988 -Node: Debugger976389 -Node: Debugging977413 -Node: Debugging Concepts977862 -Node: Debugging Terms979688 -Node: Awk Debugging982301 -Ref: Awk Debugging-Footnote-1983278 -Node: Sample Debugging Session983418 -Node: Debugger Invocation983970 -Node: Finding The Bug985599 -Node: List of Debugger Commands992285 -Node: Breakpoint Control993662 -Node: Debugger Execution Control997494 -Node: Viewing And Changing Data1000974 -Node: Execution Stack1004712 -Node: Debugger Info1006393 -Node: Miscellaneous Debugger Commands1010692 -Node: Readline Support1015945 -Node: Limitations1016891 -Node: Debugging Summary1019535 -Node: Namespaces1020838 -Node: Global Namespace1021965 -Node: Qualified Names1023410 -Node: Default Namespace1024445 -Node: Changing The Namespace1025220 -Node: Naming Rules1026914 -Node: Internal Name Management1028829 -Node: Namespace Example1029899 -Node: Namespace And Features1032482 -Node: Namespace Summary1033939 -Node: Arbitrary Precision Arithmetic1035452 -Node: Computer Arithmetic1036971 -Ref: table-numeric-ranges1040788 -Ref: table-floating-point-ranges1041286 -Ref: Computer Arithmetic-Footnote-11041945 -Node: Math Definitions1042004 -Ref: table-ieee-formats1045049 -Node: MPFR features1045623 -Node: MPFR On Parole1046076 -Ref: MPFR On Parole-Footnote-11046920 -Node: MPFR Intro1047079 -Node: FP Math Caution1048769 -Ref: FP Math Caution-Footnote-11049843 -Node: Inexactness of computations1050220 -Node: Inexact representation1051251 -Node: Comparing FP Values1052634 -Node: Errors accumulate1053892 -Node: Strange values1055359 -Ref: Strange values-Footnote-11058025 -Node: Getting Accuracy1058130 -Node: Try To Round1060867 -Node: Setting precision1061774 -Ref: table-predefined-precision-strings1062479 -Node: Setting the rounding mode1064364 -Ref: table-gawk-rounding-modes1064746 -Ref: Setting the rounding mode-Footnote-11068804 -Node: Arbitrary Precision Integers1068987 -Ref: Arbitrary Precision Integers-Footnote-11072199 -Node: Checking for MPFR1072355 -Node: POSIX Floating Point Problems1073845 -Ref: POSIX Floating Point Problems-Footnote-11078709 -Node: Floating point summary1078747 -Node: Dynamic Extensions1081011 -Node: Extension Intro1082610 -Node: Plugin License1083918 -Node: Extension Mechanism Outline1084731 -Ref: figure-load-extension1085182 -Ref: figure-register-new-function1086762 -Ref: figure-call-new-function1087867 -Node: Extension API Description1089986 -Node: Extension API Functions Introduction1091715 -Ref: table-api-std-headers1093613 -Node: General Data Types1098077 -Ref: General Data Types-Footnote-11107245 -Node: Memory Allocation Functions1107560 -Ref: Memory Allocation Functions-Footnote-11112285 -Node: Constructor Functions1112384 -Node: API Ownership of MPFR and GMP Values1116289 -Node: Registration Functions1117850 -Node: Extension Functions1118554 -Node: Exit Callback Functions1124130 -Node: Extension Version String1125449 -Node: Input Parsers1126144 -Node: Output Wrappers1139518 -Node: Two-way processors1144226 -Node: Printing Messages1146587 -Ref: Printing Messages-Footnote-11147801 -Node: Updating ERRNO1147956 -Node: Requesting Values1148755 -Ref: table-value-types-returned1149508 -Node: Accessing Parameters1150617 -Node: Symbol Table Access1151901 -Node: Symbol table by name1152417 -Ref: Symbol table by name-Footnote-11155628 -Node: Symbol table by cookie1155760 -Ref: Symbol table by cookie-Footnote-11160041 -Node: Cached values1160105 -Ref: Cached values-Footnote-11163749 -Node: Array Manipulation1163906 -Ref: Array Manipulation-Footnote-11165009 -Node: Array Data Types1165046 -Ref: Array Data Types-Footnote-11167868 -Node: Array Functions1167968 -Node: Flattening Arrays1172997 -Node: Creating Arrays1180049 -Node: Redirection API1184899 -Node: Extension API Variables1187920 -Node: Extension Versioning1188645 -Ref: gawk-api-version1189082 -Node: Extension GMP/MPFR Versioning1190870 -Node: Extension API Informational Variables1192576 -Node: Extension API Boilerplate1193737 -Node: Changes from API V11197873 -Node: Finding Extensions1199507 -Node: Extension Example1200082 -Node: Internal File Description1200906 -Node: Internal File Ops1205230 -Ref: Internal File Ops-Footnote-11216788 -Node: Using Internal File Ops1216936 -Ref: Using Internal File Ops-Footnote-11219367 -Node: Extension Samples1219645 -Node: Extension Sample File Functions1221214 -Node: Extension Sample Fnmatch1229352 -Node: Extension Sample Fork1230947 -Node: Extension Sample Inplace1232223 -Node: Extension Sample Ord1235895 -Node: Extension Sample Readdir1236771 -Ref: table-readdir-file-types1237668 -Node: Extension Sample Revout1238806 -Node: Extension Sample Rev2way1239403 -Node: Extension Sample Read write array1240155 -Node: Extension Sample Readfile1243429 -Node: Extension Sample Time1244560 -Node: Extension Sample API Tests1246850 -Node: gawkextlib1247358 -Node: Extension summary1250394 -Node: Extension Exercises1254252 -Node: Language History1255530 -Node: V7/SVR3.11257244 -Node: SVR41259594 -Node: POSIX1261126 -Node: BTL1262551 -Node: POSIX/GNU1263320 -Node: Feature History1269851 -Node: Common Extensions1289364 -Node: Ranges and Locales1290733 -Ref: Ranges and Locales-Footnote-11295534 -Ref: Ranges and Locales-Footnote-21295561 -Ref: Ranges and Locales-Footnote-31295800 -Node: Contributors1296023 -Node: History summary1302228 -Node: Installation1303674 -Node: Gawk Distribution1304638 -Node: Getting1305130 -Node: Extracting1306129 -Node: Distribution contents1307841 -Node: Unix Installation1315921 -Node: Quick Installation1316743 -Node: Compiling with MPFR1319289 -Node: Shell Startup Files1319995 -Node: Additional Configuration Options1321152 -Node: Configuration Philosophy1323539 -Node: Compiling from Git1326041 -Node: Building the Documentation1326600 -Node: Non-Unix Installation1328012 -Node: PC Installation1328488 -Node: PC Binary Installation1329361 -Node: PC Compiling1330266 -Node: PC Using1331444 -Node: Cygwin1335172 -Node: MSYS1336428 -Node: OpenVMS Installation1337060 -Node: OpenVMS Compilation1337741 -Ref: OpenVMS Compilation-Footnote-11339224 -Node: OpenVMS Dynamic Extensions1339286 -Node: OpenVMS Installation Details1340922 -Node: OpenVMS Running1343357 -Node: OpenVMS GNV1347494 -Node: Bugs1348249 -Node: Bug definition1349173 -Node: Bug address1352824 -Node: Usenet1356415 -Node: Performance bugs1357646 -Node: Asking for help1360664 -Node: Maintainers1362655 -Node: Other Versions1363682 -Node: Installation summary1372614 -Node: Notes1373998 -Node: Compatibility Mode1374808 -Node: Additions1375630 -Node: Accessing The Source1376575 -Node: Adding Code1378110 -Node: New Ports1385246 -Node: Derived Files1389756 -Ref: Derived Files-Footnote-11395603 -Ref: Derived Files-Footnote-21395638 -Ref: Derived Files-Footnote-31396255 -Node: Future Extensions1396369 -Node: Implementation Limitations1397041 -Node: Extension Design1398283 -Node: Old Extension Problems1399447 -Ref: Old Extension Problems-Footnote-11401023 -Node: Extension New Mechanism Goals1401084 -Ref: Extension New Mechanism Goals-Footnote-11404580 -Node: Extension Other Design Decisions1404781 -Node: Extension Future Growth1406980 -Node: Notes summary1407604 -Node: Basic Concepts1408817 -Node: Basic High Level1409502 -Ref: figure-general-flow1409784 -Ref: figure-process-flow1410486 -Ref: Basic High Level-Footnote-11413882 -Node: Basic Data Typing1414071 -Node: Glossary1417489 -Node: Copying1450611 -Node: GNU Free Documentation License1488372 -Node: Index1513695 +Node: Loading Shared Libraries166307 +Node: Obsolete167799 +Node: Undocumented168435 +Node: Invoking Summary168734 +Node: Regexp171761 +Node: Regexp Usage173255 +Node: Escape Sequences175356 +Node: Regexp Operators182692 +Node: Regexp Operator Details183185 +Ref: Regexp Operator Details-Footnote-1191051 +Node: Interval Expressions191210 +Ref: Interval Expressions-Footnote-1193479 +Node: Bracket Expressions193579 +Ref: table-char-classes196139 +Node: Leftmost Longest199661 +Node: Computed Regexps201021 +Node: GNU Regexp Operators204544 +Node: Case-sensitivity208567 +Ref: Case-sensitivity-Footnote-1211524 +Ref: Case-sensitivity-Footnote-2211769 +Node: Regexp Summary211885 +Node: Reading Files213409 +Node: Records215726 +Node: awk split records216837 +Node: gawk split records221727 +Ref: gawk split records-Footnote-1227021 +Node: Fields227058 +Node: Nonconstant Fields229945 +Ref: Nonconstant Fields-Footnote-1232256 +Node: Changing Fields232472 +Node: Field Separators238780 +Node: Default Field Splitting241653 +Node: Regexp Field Splitting242796 +Node: Single Character Fields246625 +Node: Comma Separated Fields247714 +Ref: table-csv-examples249002 +Node: Command Line Field Separator249954 +Node: Full Line Fields253340 +Ref: Full Line Fields-Footnote-1254920 +Ref: Full Line Fields-Footnote-2254966 +Node: Field Splitting Summary255074 +Node: Constant Size257353 +Node: Fixed width data258097 +Node: Skipping intervening261616 +Node: Allowing trailing data262418 +Node: Fields with fixed data263483 +Node: Splitting By Content265109 +Ref: Splitting By Content-Footnote-1269294 +Node: More CSV269457 +Node: FS versus FPAT271110 +Node: Testing field creation272319 +Node: Multiple Line274012 +Node: Getline280494 +Node: Plain Getline283080 +Node: Getline/Variable285730 +Node: Getline/File286927 +Node: Getline/Variable/File288375 +Ref: Getline/Variable/File-Footnote-1290020 +Node: Getline/Pipe290116 +Node: Getline/Variable/Pipe292929 +Node: Getline/Coprocess294112 +Node: Getline/Variable/Coprocess295435 +Node: Getline Notes296201 +Node: Getline Summary299162 +Ref: table-getline-variants299606 +Node: Read Timeout300511 +Ref: Read Timeout-Footnote-1304475 +Node: Retrying Input304533 +Node: Command-line directories305800 +Node: Input Summary306738 +Node: Input Exercises310118 +Node: Printing310558 +Node: Print312501 +Node: Print Examples314007 +Node: Output Separators316860 +Node: OFMT318971 +Node: Printf320394 +Node: Basic Printf321199 +Node: Control Letters322835 +Node: Format Modifiers328304 +Node: Printf Examples334590 +Node: Redirection337135 +Node: Special FD344209 +Ref: Special FD-Footnote-1347499 +Node: Special Files347585 +Node: Other Inherited Files348214 +Node: Special Network349279 +Node: Special Caveats350167 +Node: Close Files And Pipes351150 +Ref: Close Files And Pipes-Footnote-1357286 +Node: Close Return Value357442 +Ref: table-close-pipe-return-values358717 +Ref: Close Return Value-Footnote-1359551 +Node: Noflush359707 +Node: Nonfatal361179 +Node: Output Summary363596 +Node: Output Exercises364882 +Node: Expressions365573 +Node: Values366775 +Node: Constants367453 +Node: Scalar Constants368150 +Ref: Scalar Constants-Footnote-1370725 +Node: Nondecimal-numbers370975 +Node: Regexp Constants374096 +Node: Using Constant Regexps374642 +Node: Standard Regexp Constants375288 +Node: Strong Regexp Constants378588 +Node: Variables382439 +Node: Using Variables383104 +Node: Assignment Options385084 +Node: Conversion387646 +Node: Strings And Numbers388178 +Ref: Strings And Numbers-Footnote-1391397 +Node: Locale influences conversions391506 +Ref: table-locale-affects394356 +Node: All Operators394999 +Node: Arithmetic Ops395640 +Node: Concatenation398470 +Ref: Concatenation-Footnote-1401420 +Node: Assignment Ops401543 +Ref: table-assign-ops406682 +Node: Increment Ops408064 +Node: Truth Values and Conditions411663 +Node: Truth Values412789 +Node: Typing and Comparison413880 +Node: Variable Typing414716 +Ref: Variable Typing-Footnote-1421378 +Ref: Variable Typing-Footnote-2421458 +Node: Comparison Operators421541 +Ref: table-relational-ops421968 +Node: POSIX String Comparison425654 +Ref: POSIX String Comparison-Footnote-1427413 +Ref: POSIX String Comparison-Footnote-2427556 +Node: Boolean Ops427640 +Ref: Boolean Ops-Footnote-1432333 +Node: Conditional Exp432429 +Node: Function Calls434215 +Node: Precedence438165 +Node: Locales442042 +Node: Expressions Summary443724 +Node: Patterns and Actions446387 +Node: Pattern Overview447529 +Node: Regexp Patterns449255 +Node: Expression Patterns449801 +Node: Ranges453710 +Node: BEGIN/END456888 +Node: Using BEGIN/END457701 +Ref: Using BEGIN/END-Footnote-1460611 +Node: I/O And BEGIN/END460721 +Node: BEGINFILE/ENDFILE463202 +Node: Empty466643 +Node: Using Shell Variables466960 +Node: Action Overview469298 +Node: Statements471733 +Node: If Statement473631 +Node: While Statement475200 +Node: Do Statement477288 +Node: For Statement478474 +Node: Switch Statement481831 +Node: Break Statement484382 +Node: Continue Statement486574 +Node: Next Statement488506 +Node: Nextfile Statement491003 +Node: Exit Statement493864 +Node: Built-in Variables496397 +Node: User-modified497576 +Node: Auto-set505787 +Ref: Auto-set-Footnote-1523886 +Ref: Auto-set-Footnote-2524104 +Node: ARGC and ARGV524160 +Node: Pattern Action Summary528599 +Node: Arrays531215 +Node: Array Basics532592 +Node: Array Intro533442 +Ref: figure-array-elements535458 +Ref: Array Intro-Footnote-1538327 +Node: Reference to Elements538459 +Node: Assigning Elements540981 +Node: Array Example541476 +Node: Scanning an Array543445 +Node: Controlling Scanning546542 +Ref: Controlling Scanning-Footnote-1553205 +Node: Numeric Array Subscripts553529 +Node: Uninitialized Subscripts555803 +Node: Delete557482 +Ref: Delete-Footnote-1560296 +Node: Multidimensional560353 +Node: Multiscanning563558 +Node: Arrays of Arrays565230 +Node: Arrays Summary570130 +Node: Functions572319 +Node: Built-in573379 +Node: Calling Built-in574568 +Node: Boolean Functions576615 +Node: Numeric Functions577185 +Ref: Numeric Functions-Footnote-1581378 +Ref: Numeric Functions-Footnote-2582062 +Ref: Numeric Functions-Footnote-3582114 +Node: String Functions582390 +Ref: String Functions-Footnote-1608526 +Ref: String Functions-Footnote-2608660 +Ref: String Functions-Footnote-3608920 +Node: Gory Details609007 +Ref: table-sub-escapes610914 +Ref: table-sub-proposed612560 +Ref: table-posix-sub614070 +Ref: table-gensub-escapes615758 +Ref: Gory Details-Footnote-1616692 +Node: I/O Functions616846 +Ref: table-system-return-values623533 +Ref: I/O Functions-Footnote-1625704 +Ref: I/O Functions-Footnote-2625852 +Node: Time Functions625972 +Ref: Time Functions-Footnote-1637128 +Ref: Time Functions-Footnote-2637204 +Ref: Time Functions-Footnote-3637366 +Ref: Time Functions-Footnote-4637477 +Ref: Time Functions-Footnote-5637595 +Ref: Time Functions-Footnote-6637830 +Node: Bitwise Functions638112 +Ref: table-bitwise-ops638714 +Ref: Bitwise Functions-Footnote-1644968 +Ref: Bitwise Functions-Footnote-2645147 +Node: Type Functions645344 +Node: I18N Functions648937 +Node: User-defined650680 +Node: Definition Syntax651500 +Ref: Definition Syntax-Footnote-1657328 +Node: Function Example657405 +Ref: Function Example-Footnote-1660384 +Node: Function Calling660406 +Node: Calling A Function661000 +Node: Variable Scope661970 +Node: Pass By Value/Reference665024 +Node: Function Caveats667756 +Ref: Function Caveats-Footnote-1669851 +Node: Return Statement669975 +Node: Dynamic Typing673030 +Node: Indirect Calls673986 +Node: Functions Summary685145 +Node: Library Functions687922 +Ref: Library Functions-Footnote-1691470 +Ref: Library Functions-Footnote-2691613 +Node: Library Names691788 +Ref: Library Names-Footnote-1695582 +Ref: Library Names-Footnote-2695809 +Node: General Functions695905 +Node: Strtonum Function697099 +Node: Assert Function700181 +Node: Round Function703633 +Node: Cliff Random Function705211 +Node: Ordinal Functions706244 +Ref: Ordinal Functions-Footnote-1709353 +Ref: Ordinal Functions-Footnote-2709605 +Node: Join Function709819 +Ref: Join Function-Footnote-1711622 +Node: Getlocaltime Function711826 +Node: Readfile Function715600 +Node: Shell Quoting717629 +Node: Isnumeric Function719085 +Node: Data File Management720497 +Node: Filetrans Function721129 +Node: Rewind Function725423 +Node: File Checking727402 +Ref: File Checking-Footnote-1728774 +Node: Empty Files728981 +Node: Ignoring Assigns731048 +Node: Getopt Function732622 +Ref: Getopt Function-Footnote-1748456 +Node: Passwd Functions748668 +Ref: Passwd Functions-Footnote-1757850 +Node: Group Functions757938 +Ref: Group Functions-Footnote-1766076 +Node: Walking Arrays766289 +Node: Library Functions Summary769337 +Node: Library Exercises770761 +Node: Sample Programs771248 +Node: Running Examples772030 +Node: Clones772782 +Node: Cut Program774054 +Node: Egrep Program784495 +Node: Id Program793812 +Node: Split Program803926 +Ref: Split Program-Footnote-1814161 +Node: Tee Program814348 +Node: Uniq Program817257 +Node: Wc Program825122 +Node: Bytes vs. Characters825517 +Node: Using extensions827119 +Node: wc program827899 +Node: Miscellaneous Programs832905 +Node: Dupword Program834134 +Node: Alarm Program836197 +Node: Translate Program841110 +Ref: Translate Program-Footnote-1845851 +Node: Labels Program846129 +Ref: Labels Program-Footnote-1849570 +Node: Word Sorting849662 +Node: History Sorting853856 +Node: Extract Program856131 +Node: Simple Sed864400 +Node: Igawk Program867616 +Ref: Igawk Program-Footnote-1882863 +Ref: Igawk Program-Footnote-2883069 +Ref: Igawk Program-Footnote-3883199 +Node: Anagram Program883326 +Node: Signature Program886422 +Node: Programs Summary887674 +Node: Programs Exercises888932 +Ref: Programs Exercises-Footnote-1893248 +Node: Advanced Features893334 +Node: Nondecimal Data895828 +Node: Boolean Typed Values897458 +Node: Array Sorting899433 +Node: Controlling Array Traversal900162 +Ref: Controlling Array Traversal-Footnote-1908669 +Node: Array Sorting Functions908791 +Ref: Array Sorting Functions-Footnote-1914910 +Node: Two-way I/O915118 +Ref: Two-way I/O-Footnote-1923113 +Ref: Two-way I/O-Footnote-2923304 +Node: TCP/IP Networking923386 +Node: Profiling926566 +Node: Persistent Memory936276 +Ref: Persistent Memory-Footnote-1945234 +Node: Extension Philosophy945365 +Node: Advanced Features Summary946900 +Node: Internationalization949170 +Node: I18N and L10N950876 +Node: Explaining gettext951571 +Ref: Explaining gettext-Footnote-1957724 +Ref: Explaining gettext-Footnote-2957919 +Node: Programmer i18n958084 +Ref: Programmer i18n-Footnote-1963197 +Node: Translator i18n963246 +Node: String Extraction964082 +Ref: String Extraction-Footnote-1965260 +Node: Printf Ordering965358 +Ref: Printf Ordering-Footnote-1968220 +Node: I18N Portability968288 +Ref: I18N Portability-Footnote-1970862 +Node: I18N Example970933 +Ref: I18N Example-Footnote-1974333 +Ref: I18N Example-Footnote-2974409 +Node: Gawk I18N974526 +Node: I18N Summary975182 +Node: Debugger976583 +Node: Debugging977607 +Node: Debugging Concepts978056 +Node: Debugging Terms979882 +Node: Awk Debugging982495 +Ref: Awk Debugging-Footnote-1983472 +Node: Sample Debugging Session983612 +Node: Debugger Invocation984164 +Node: Finding The Bug985793 +Node: List of Debugger Commands992479 +Node: Breakpoint Control993856 +Node: Debugger Execution Control997688 +Node: Viewing And Changing Data1001168 +Node: Execution Stack1004906 +Node: Debugger Info1006587 +Node: Miscellaneous Debugger Commands1010886 +Node: Readline Support1016139 +Node: Limitations1017085 +Node: Debugging Summary1019729 +Node: Namespaces1021032 +Node: Global Namespace1022159 +Node: Qualified Names1023604 +Node: Default Namespace1024639 +Node: Changing The Namespace1025414 +Node: Naming Rules1027108 +Node: Internal Name Management1029023 +Node: Namespace Example1030093 +Node: Namespace And Features1032676 +Node: Namespace Summary1034133 +Node: Arbitrary Precision Arithmetic1035646 +Node: Computer Arithmetic1037165 +Ref: table-numeric-ranges1040982 +Ref: table-floating-point-ranges1041480 +Ref: Computer Arithmetic-Footnote-11042139 +Node: Math Definitions1042198 +Ref: table-ieee-formats1045243 +Node: MPFR features1045817 +Node: MPFR On Parole1046270 +Ref: MPFR On Parole-Footnote-11047114 +Node: MPFR Intro1047273 +Node: FP Math Caution1048963 +Ref: FP Math Caution-Footnote-11050037 +Node: Inexactness of computations1050414 +Node: Inexact representation1051445 +Node: Comparing FP Values1052828 +Node: Errors accumulate1054086 +Node: Strange values1055553 +Ref: Strange values-Footnote-11058219 +Node: Getting Accuracy1058324 +Node: Try To Round1061061 +Node: Setting precision1061968 +Ref: table-predefined-precision-strings1062673 +Node: Setting the rounding mode1064558 +Ref: table-gawk-rounding-modes1064940 +Ref: Setting the rounding mode-Footnote-11068998 +Node: Arbitrary Precision Integers1069181 +Ref: Arbitrary Precision Integers-Footnote-11072393 +Node: Checking for MPFR1072549 +Node: POSIX Floating Point Problems1074039 +Ref: POSIX Floating Point Problems-Footnote-11078903 +Node: Floating point summary1078941 +Node: Dynamic Extensions1081205 +Node: Extension Intro1082804 +Node: Plugin License1084112 +Node: Extension Mechanism Outline1084925 +Ref: figure-load-extension1085376 +Ref: figure-register-new-function1086961 +Ref: figure-call-new-function1088071 +Node: Extension API Description1090195 +Node: Extension API Functions Introduction1091924 +Ref: table-api-std-headers1093822 +Node: General Data Types1098286 +Ref: General Data Types-Footnote-11107454 +Node: Memory Allocation Functions1107769 +Ref: Memory Allocation Functions-Footnote-11112494 +Node: Constructor Functions1112593 +Node: API Ownership of MPFR and GMP Values1116498 +Node: Registration Functions1118059 +Node: Extension Functions1118763 +Node: Exit Callback Functions1124339 +Node: Extension Version String1125658 +Node: Input Parsers1126353 +Node: Output Wrappers1140997 +Node: Two-way processors1145845 +Node: Printing Messages1148206 +Ref: Printing Messages-Footnote-11149420 +Node: Updating ERRNO1149575 +Node: Requesting Values1150374 +Ref: table-value-types-returned1151127 +Node: Accessing Parameters1152236 +Node: Symbol Table Access1153520 +Node: Symbol table by name1154036 +Ref: Symbol table by name-Footnote-11157247 +Node: Symbol table by cookie1157379 +Ref: Symbol table by cookie-Footnote-11161660 +Node: Cached values1161724 +Ref: Cached values-Footnote-11165368 +Node: Array Manipulation1165525 +Ref: Array Manipulation-Footnote-11166628 +Node: Array Data Types1166665 +Ref: Array Data Types-Footnote-11169487 +Node: Array Functions1169587 +Node: Flattening Arrays1174616 +Node: Creating Arrays1181668 +Node: Redirection API1186518 +Node: Extension API Variables1189539 +Node: Extension Versioning1190264 +Ref: gawk-api-version1190701 +Node: Extension GMP/MPFR Versioning1192489 +Node: Extension API Informational Variables1194195 +Node: Extension API Boilerplate1195356 +Node: Changes from API V11199492 +Node: Finding Extensions1201126 +Node: Extension Example1201701 +Node: Internal File Description1202525 +Node: Internal File Ops1206849 +Ref: Internal File Ops-Footnote-11218407 +Node: Using Internal File Ops1218555 +Ref: Using Internal File Ops-Footnote-11220986 +Node: Extension Samples1221264 +Node: Extension Sample File Functions1222833 +Node: Extension Sample Fnmatch1230971 +Node: Extension Sample Fork1232566 +Node: Extension Sample Inplace1233842 +Node: Extension Sample Ord1237514 +Node: Extension Sample Readdir1238390 +Ref: table-readdir-file-types1239287 +Node: Extension Sample Revout1240425 +Node: Extension Sample Rev2way1241022 +Node: Extension Sample Read write array1241774 +Node: Extension Sample Readfile1245048 +Node: Extension Sample Time1246179 +Node: Extension Sample API Tests1248469 +Node: gawkextlib1248977 +Node: Extension summary1252013 +Node: Extension Exercises1255871 +Node: Language History1257149 +Node: V7/SVR3.11258863 +Node: SVR41261213 +Node: POSIX1262745 +Node: BTL1264170 +Node: POSIX/GNU1264939 +Node: Feature History1271470 +Node: Common Extensions1291036 +Node: Ranges and Locales1292405 +Ref: Ranges and Locales-Footnote-11297206 +Ref: Ranges and Locales-Footnote-21297233 +Ref: Ranges and Locales-Footnote-31297472 +Node: Contributors1297695 +Node: History summary1303900 +Node: Installation1305346 +Node: Gawk Distribution1306310 +Node: Getting1306802 +Node: Extracting1307801 +Node: Distribution contents1309513 +Node: Unix Installation1317593 +Node: Quick Installation1318415 +Node: Compiling with MPFR1320961 +Node: Shell Startup Files1321667 +Node: Additional Configuration Options1322824 +Node: Configuration Philosophy1325211 +Node: Compiling from Git1327713 +Node: Building the Documentation1328272 +Node: Non-Unix Installation1329684 +Node: PC Installation1330160 +Node: PC Binary Installation1331033 +Node: PC Compiling1331938 +Node: PC Using1333116 +Node: Cygwin1336844 +Node: MSYS1338100 +Node: OpenVMS Installation1338732 +Node: OpenVMS Compilation1339413 +Ref: OpenVMS Compilation-Footnote-11340896 +Node: OpenVMS Dynamic Extensions1340958 +Node: OpenVMS Installation Details1342594 +Node: OpenVMS Running1345029 +Node: OpenVMS GNV1349166 +Node: Bugs1349921 +Node: Bug definition1350845 +Node: Bug address1354496 +Node: Usenet1358087 +Node: Performance bugs1359318 +Node: Asking for help1362336 +Node: Maintainers1364327 +Node: Other Versions1365354 +Node: Installation summary1374286 +Node: Notes1375670 +Node: Compatibility Mode1376480 +Node: Additions1377302 +Node: Accessing The Source1378247 +Node: Adding Code1379782 +Node: New Ports1386918 +Node: Derived Files1391428 +Ref: Derived Files-Footnote-11397275 +Ref: Derived Files-Footnote-21397310 +Ref: Derived Files-Footnote-31397927 +Node: Future Extensions1398041 +Node: Implementation Limitations1398713 +Node: Extension Design1399955 +Node: Old Extension Problems1401119 +Ref: Old Extension Problems-Footnote-11402695 +Node: Extension New Mechanism Goals1402756 +Ref: Extension New Mechanism Goals-Footnote-11406252 +Node: Extension Other Design Decisions1406453 +Node: Extension Future Growth1408652 +Node: Notes summary1409276 +Node: Basic Concepts1410489 +Node: Basic High Level1411174 +Ref: figure-general-flow1411456 +Ref: figure-process-flow1412163 +Ref: Basic High Level-Footnote-11415564 +Node: Basic Data Typing1415753 +Node: Glossary1419171 +Node: Copying1452293 +Node: GNU Free Documentation License1490054 +Node: Index1515377 End Tag Table diff --git a/doc/gawk.texi b/doc/gawk.texi index 53ea283b..f1a9d79c 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -50,7 +50,7 @@ @ifnottex @set TIMES * @end ifnottex - + @c Let texinfo.tex give us full section titles @xrefautomaticsectiontitle on @@ -73,7 +73,7 @@ @set TITLE GAWK: Effective AWK Programming @end ifclear @set SUBTITLE A User's Guide for GNU Awk -@set EDITION 5.2 +@set EDITION 5.3 @iftex @set DOCUMENT book @@ -5158,6 +5158,10 @@ thus reducing the need for writing complex and tedious command lines. In particular, @code{@@include} is very useful for writing CGI scripts to be run from web pages. +The @code{@@include} directive and the @option{-i}/@option{--include} +command line option are completely equivalent. An included program +source is not loaded if it has been previously loaded. + The rules for finding a source file described in @ref{AWKPATH Variable} also apply to files loaded with @code{@@include}. @@ -5372,7 +5376,7 @@ non-option argument, even if it begins with @samp{-}. @itemize @value{MINUS} @item However, when an option itself requires an argument, and the option is separated -from that argument on the command line by at least one space, the space +from that argument on the command line by at least one space, the space is ignored, and the argument is considered to be related to the option. Thus, in the invocation, @samp{gawk -F x}, the @samp{x} is treated as belonging to the @option{-F} option, not as a separate non-option argument. @@ -6392,10 +6396,10 @@ Subject: Re: [bug-gawk] Does gawk character classes follow this? > From: arnold@skeeve.com > Date: Fri, 15 Feb 2019 03:01:34 -0700 > Cc: pengyu.ut@gmail.com, bug-gawk@gnu.org -> +> > I get the feeling that there's something really bothering you, but > I don't understand what. -> +> > Can you clarify, please? I thought I already did: we cannot be expected to provide a definitive @@ -9204,7 +9208,7 @@ processing on the next record @emph{right now}. For example: @{ while ((start = index($0, "/*")) != 0) @{ out = substr($0, 1, start - 1) # leading part of the string - rest = substr($0, start + 2) # ... */ ... + rest = substr($0, start + 2) # ... */ ... while ((end = index(rest, "*/")) == 0) @{ # is */ in trailing part? # get more text if (getline <= 0) @{ @@ -9830,7 +9834,7 @@ on a per-command or per-connection basis. the attempt to read from the underlying device may succeed in a later attempt. This is a limitation, and it also means that you cannot use this to multiplex input from -two or more sources. @xref{Retrying Input} for a way to enable +two or more sources. @xref{Retrying Input} for a way to enable later I/O attempts to succeed. Assigning a timeout value prevents read operations from being @@ -11868,7 +11872,7 @@ intact, as part of the string: @example $ @kbd{nawk 'BEGIN @{ print "hello, \} > @kbd{world" @}'} -@print{} hello, +@print{} hello, @print{} world @end example @@ -17223,7 +17227,7 @@ conceptually, if the element values are eight, @code{"foo"}, @ifnotdocbook @float Figure,figure-array-elements @caption{A contiguous array} -@center @image{array-elements, , , A Contiguous Array} +@center @image{gawk_array-elements, , , A Contiguous Array} @end float @end ifnotdocbook @@ -23909,7 +23913,7 @@ $ cat @kbd{test.awk} @print{} rewound = 1 @print{} rewind() @print{} @} -@print{} +@print{} @print{} @{ print FILENAME, FNR, $0 @} $ @kbd{gawk -f rewind.awk -f test.awk data } @@ -26624,7 +26628,7 @@ exist: @example @c file eg/prog/id.awk -function fill_info_for_user(user, +function fill_info_for_user(user, pwent, fields, groupnames, grent, groups, i) @{ pwent = getpwnam(user) @@ -30634,20 +30638,20 @@ using ptys can help deal with buffering deadlocks. Suppose @command{gawk} were unable to add numbers. You could use a coprocess to do it. Here's an exceedingly -simple program written for that purpose: +simple program written for that purpose: @example $ @kbd{cat add.c} -#include <stdio.h> - -int -main(void) -@{ - int x, y; - while (scanf("%d %d", & x, & y) == 2) - printf("%d\n", x + y); - return 0; -@} +#include <stdio.h> + +int +main(void) +@{ + int x, y; + while (scanf("%d %d", & x, & y) == 2) + printf("%d\n", x + y); + return 0; +@} $ @kbd{cc -O add.c -o add} @ii{Compile the program} @end example @@ -30660,15 +30664,15 @@ $ @kbd{echo 1 2 |} @end example And it would deadlock, because @file{add.c} fails to call -@samp{setlinebuf(stdout)}. The @command{add} program freezes. +@samp{setlinebuf(stdout)}. The @command{add} program freezes. -Now try instead: +Now try instead: @example $ @kbd{echo 1 2 |} > @kbd{gawk -v cmd=add 'BEGIN @{ PROCINFO[cmd, "pty"] = 1 @}} > @kbd{ @{ print |& cmd; cmd |& getline x; print x @}'} -@print{} 3 +@print{} 3 @end example By using a pty, @command{gawk} fools the standard I/O library into @@ -31259,7 +31263,7 @@ Terence Kelly, the author of the persistent memory allocator @command{gawk} uses, provides the following advice about the backing file: @quotation -Regarding backing file size, I recommend making it far larger +Regarding backing file size, I recommend making it far larger than all of the data that will ever reside in it, assuming that the file system supports sparse files. The ``pay only for what you use'' aspect of sparse files ensures that the @@ -31347,8 +31351,8 @@ ACM @cite{Queue} magazine, Vol. 20 No. 2 (March/April 2022), @uref{https://dl.acm.org/doi/pdf/10.1145/3534855, PDF}, @uref{https://queue.acm.org/detail.cfm?id=3534855, HTML}. This paper explains the design of the PMA -allocator used in persistent @command{gawk}. - +allocator used in persistent @command{gawk}. + @item @cite{Persistent Scripting} Zi Fan Tan, Jianan Li, Haris Volos, and Terence Kelly, Non-Volatile Memory Workshop (NVMW) 2022, @@ -31360,7 +31364,7 @@ non-volatile memory; note that the interface differs slightly. @item @cite{Persistent Memory Programming on Conventional Hardware} Terence Kelly, ACM @cite{Queue} magazine Vol. 17 No. 4 (July/Aug 2019), -@uref{https://dl.acm.org/doi/pdf/10.1145/3358955.3358957, PDF}, +@uref{https://dl.acm.org/doi/pdf/10.1145/3358955.3358957, PDF}, @uref{https://queue.acm.org/detail.cfm?id=3358957, HTML}. This paper describes simple techniques for persistent memory for C/C++ code on conventional computers that lack non-volatile memory hardware. @@ -31370,8 +31374,8 @@ Terence Kelly, ACM @cite{Queue} magazine Vol. 18 No. 2 (March/April 2020), @uref{https://dl.acm.org/doi/pdf/10.1145/3400899.3400902, PDF}, @uref{https://queue.acm.org/detail.cfm?id=3400902, HTML}. -This paper describes a simple and robust testbed for testing software -against real power failures. +This paper describes a simple and robust testbed for testing software +against real power failures. @item @cite{Crashproofing the Original NoSQL Key/Value Store} Terence Kelly, @@ -35641,7 +35645,7 @@ It's Euler's modification to Newton's method for calculating pi. Take a look at lines (23) - (25) here: http://mathworld.wolfram.com/PiFormulas.htm -The algorithm I wrote simply expands the multiply by 2 and works from the innermost expression outwards. I used this to program HP calculators because it's quite easy to modify for tiny memory devices with smallish word sizes. +The algorithm I wrote simply expands the multiply by 2 and works from the innermost expression outwards. I used this to program HP calculators because it's quite easy to modify for tiny memory devices with smallish word sizes. http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899 @@ -36011,7 +36015,7 @@ This is shown in @inlineraw{docbook, <xref linkend="figure-load-extension"/>}. @ifnotdocbook @float Figure,figure-load-extension @caption{Loading the extension} -@center @image{api-figure1, , , Loading the extension} +@center @image{gawk_api-figure1, , , Loading the extension} @end float @end ifnotdocbook @@ -36038,7 +36042,7 @@ This is shown in @inlineraw{docbook, <xref linkend="figure-register-new-function @ifnotdocbook @float Figure,figure-register-new-function @caption{Registering a new function} -@center @image{api-figure2, , , Registering a new Function} +@center @image{gawk_api-figure2, , , Registering a new Function} @end float @end ifnotdocbook @@ -36066,7 +36070,7 @@ This is shown in @inlineraw{docbook, <xref linkend="figure-call-new-function"/>} @ifnotdocbook @float Figure,figure-call-new-function @caption{Calling the new function} -@center @image{api-figure3, , , Calling the new function} +@center @image{gawk_api-figure3, , , Calling the new function} @end float @end ifnotdocbook @@ -36999,7 +37003,7 @@ is invoked with the @option{--version} option. @cindex customized input parser By default, @command{gawk} reads text files as its input. It uses the value -of @code{RS} to find the end of the record, and then uses @code{FS} +of @code{RS} to find the end of an input record, and then uses @code{FS} (or @code{FIELDWIDTHS} or @code{FPAT}) to split it into fields (@pxref{Reading Files}). Additionally, it sets the value of @code{RT} (@pxref{Built-in Variables}). @@ -37101,13 +37105,33 @@ are as follows: The name of the file. @item int fd; -A file descriptor for the file. If @command{gawk} was able to -open the file, then @code{fd} will @emph{not} be equal to +A file descriptor for the file. @command{gawk} attempts to open +the file for reading using the @code{open()} system call. If it was +able to open the file, then @code{fd} will @emph{not} be equal to @code{INVALID_HANDLE}. Otherwise, it will. +An extension can decide that it doesn't want to use the open file descriptor +provided by @command{gawk}. In such a case it can close the file and +set @code{fd} to @code{INVALID_HANDLE}, or it can leave it alone and +keep it's own file descriptor in private data pointed to by the +@code{opaque} pointer (see further in this list). In any case, if +the file descriptor is valid, it should @emph{not} just overwrite the +value with something else; doing so would cause a resource leak. + @item struct stat sbuf; If the file descriptor is valid, then @command{gawk} will have filled in this structure via a call to the @code{fstat()} system call. +Otherwise, if the @code{lstat()} system call is available, it will +use that. If @code{lstat()} is not available, then it uses @code{stat()}. + +Getting the file's information allows extensions to check the type of +the file even if it could not be opened. This occurs, for example, +on Windows systems when trying to use @code{open()} on a directory. + +If @command{gawk} was not able to get the file information, then +@code{sbuf} will be zeroed out. In particular, extension code +can check if @samp{sbuf.st_mode == 0}. If that's true, then there +is no information in @code{sbuf}. @end table The @code{@var{XXX}_can_take_file()} function should examine these @@ -37142,7 +37166,7 @@ This function pointer should point to a function that creates the input records. Said function is the core of the input parser. Its behavior is described in the text following this list. -@item ssize_t (*read_func)(); +@item ssize_t (*read_func)(int, void *, size_t); This function pointer should point to a function that has the same behavior as the standard POSIX @code{read()} system call. It is an alternative to the @code{get_record} pointer. Its behavior @@ -37170,12 +37194,12 @@ input records. The parameters are as follows: @item char **out This is a pointer to a @code{char *} variable that is set to point to the record. @command{gawk} makes its own copy of the data, so -the extension must manage this storage. +your extension must manage this storage. @item struct awk_input *iobuf -This is the @code{awk_input_buf_t} for the file. The fields should be -used for reading data (@code{fd}) and for managing private state -(@code{opaque}), if any. +This is the @code{awk_input_buf_t} for the file. Two of its fields should +be used by your extension: @code{fd} for reading data, and @code{opaque} +for managing any private state. @item int *errcode If an error occurs, @code{*errcode} should be set to an appropriate @@ -37187,7 +37211,7 @@ If the concept of a ``record terminator'' makes sense, then @code{*rt_start} should be set to point to the data to be used for @code{RT}, and @code{*rt_len} should be set to the length of the data. Otherwise, @code{*rt_len} should be set to zero. -@command{gawk} makes its own copy of this data, so the +Here too, @command{gawk} makes its own copy of this data, so your extension must manage this storage. @item const awk_fieldwidth_info_t **field_width @@ -37198,7 +37222,9 @@ field parsing mechanism. Note that this structure will not be copied by @command{gawk}; it must persist at least until the next call to @code{get_record} or @code{close_func}. Note also that @code{field_width} is @code{NULL} when @code{getline} is assigning the results to a variable, thus -field parsing is not needed. If the parser does set @code{*field_width}, +field parsing is not needed. + +If the parser sets @code{*field_width}, then @command{gawk} uses this layout to parse the input record, and the @code{PROCINFO["FS"]} value will be @code{"API"} while this record is active in @code{$0}. @@ -37252,15 +37278,7 @@ based upon the value of an @command{awk} variable, as the XML extension from the @code{gawkextlib} project does (@pxref{gawkextlib}). In the latter case, code in a @code{BEGINFILE} rule can look at @code{FILENAME} and @code{ERRNO} to decide whether or -not to activate an input parser (@pxref{BEGINFILE/ENDFILE}). - -You register your input parser with the following function: - -@table @code -@item void register_input_parser(awk_input_parser_t *input_parser); -Register the input parser pointed to by @code{input_parser} with -@command{gawk}. -@end table +not to activate your input parser (@pxref{BEGINFILE/ENDFILE}). If you would like to override the default field parsing mechanism for a given record, then you must populate an @code{awk_fieldwidth_info_t} structure, @@ -37285,7 +37303,7 @@ Set this to @code{awk_true} if the field lengths are specified in terms of potentially multi-byte characters, and set it to @code{awk_false} if the lengths are in terms of bytes. Performance will be better if the values are supplied in -terms of bytes. +terms of bytes. @item size_t nf; Set this to the number of fields in the input record, i.e. @code{NF}. @@ -37300,12 +37318,20 @@ for @code{$1}, and so on through the @code{fields[nf-1]} element containing the @end table A convenience macro @code{awk_fieldwidth_info_size(numfields)} is provided to -calculate the appropriate size of a variable-length +calculate the appropriate size of a variable-length @code{awk_fieldwidth_info_t} structure containing @code{numfields} fields. This can be used as an argument to @code{malloc()} or in a union to allocate space statically. Please refer to the @code{readdir_test} sample extension for an example. +You register your input parser with the following function: + +@table @code +@item void register_input_parser(awk_input_parser_t *input_parser); +Register the input parser pointed to by @code{input_parser} with +@command{gawk}. +@end table + @node Output Wrappers @subsubsection Customized Output Wrappers @cindex customized output wrapper @@ -37409,10 +37435,12 @@ what it does. The @code{@var{XXX}_can_take_file()} function should make a decision based upon the @code{name} and @code{mode} fields, and any additional state (such as @command{awk} variable values) that is appropriate. +@command{gawk} attempts to open the named file for writing. The @code{fp} +member will be @code{NULL} only if it fails. When @command{gawk} calls @code{@var{XXX}_take_control_of()}, that function should fill in the other fields as appropriate, except for @code{fp}, which it should just -use normally. +use normally if it's not @code{NULL}. You register your output wrapper with the following function: @@ -38667,7 +38695,7 @@ The following function allows extensions to access and manipulate redirections. Look up file @code{name} in @command{gawk}'s internal redirection table. If @code{name} is @code{NULL} or @code{name_len} is zero, return data for the currently open input file corresponding to @code{FILENAME}. -(This does not access the @code{filetype} argument, so that may be undefined). +(This does not access the @code{filetype} argument, so that may be undefined). If the file is not already open, attempt to open it. The @code{filetype} argument must be zero-terminated and should be one of: @@ -40034,22 +40062,22 @@ all the variables and functions in the @code{inplace} namespace @c endfile @ignore @c file eg/lib/inplace.awk -# +# # Copyright (C) 2013, 2017, 2019 the Free Software Foundation, Inc. -# +# # This file is part of GAWK, the GNU implementation of the # AWK Programming Language. -# +# # GAWK is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. -# +# # GAWK is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA @@ -42016,6 +42044,10 @@ Redirected @code{getline} became allowed inside (@pxref{BEGINFILE/ENDFILE}). @item +Support for nonfatal I/O +(@pxref{Nonfatal}). + +@item The @code{where} command was added to the debugger (@pxref{Execution Stack}). @@ -44639,7 +44671,7 @@ This is an @command{awk} interpreter written in the @uref{https://golang.org/, Go programming language}. It implements POSIX @command{awk}, with a few minor extensions. Source code is available from @uref{https://github.com/benhoyt/goawk}. -The author wrote a nice +The author wrote a nice @uref{https://benhoyt.com/writings/goawk/, article} describing the implementation. @@ -45767,7 +45799,7 @@ See @inlineraw{docbook, <xref linkend="figure-general-flow"/>}. @ifnotdocbook @float Figure,figure-general-flow @caption{General Program Flow} -@center @image{general-program, , , General program flow} +@center @image{gawk_general-program, , , General program flow} @end float @end ifnotdocbook @@ -45805,7 +45837,7 @@ as shown in @inlineraw{docbook, <xref linkend="figure-process-flow"/>}: @ifnotdocbook @float Figure,figure-process-flow @caption{Basic Program Steps} -@center @image{process-flow, , , Basic Program Stages} +@center @image{gawk_process-flow, , , Basic Program Stages} @end float @end ifnotdocbook diff --git a/doc/api-figure1.eps b/doc/gawk_api-figure1.eps index 7af094c9..7af094c9 100644 --- a/doc/api-figure1.eps +++ b/doc/gawk_api-figure1.eps diff --git a/doc/api-figure1.fig b/doc/gawk_api-figure1.fig index 7bc47846..7bc47846 100644 --- a/doc/api-figure1.fig +++ b/doc/gawk_api-figure1.fig diff --git a/doc/api-figure1.pdf b/doc/gawk_api-figure1.pdf Binary files differindex 0c24b67d..0c24b67d 100644 --- a/doc/api-figure1.pdf +++ b/doc/gawk_api-figure1.pdf diff --git a/doc/api-figure1.png b/doc/gawk_api-figure1.png Binary files differindex 72d552cd..72d552cd 100644 --- a/doc/api-figure1.png +++ b/doc/gawk_api-figure1.png diff --git a/doc/api-figure1.txt b/doc/gawk_api-figure1.txt index 686b853b..686b853b 100644 --- a/doc/api-figure1.txt +++ b/doc/gawk_api-figure1.txt diff --git a/doc/api-figure2.eps b/doc/gawk_api-figure2.eps index caf5c34c..caf5c34c 100644 --- a/doc/api-figure2.eps +++ b/doc/gawk_api-figure2.eps diff --git a/doc/api-figure2.fig b/doc/gawk_api-figure2.fig index 2ae60854..2ae60854 100644 --- a/doc/api-figure2.fig +++ b/doc/gawk_api-figure2.fig diff --git a/doc/api-figure2.pdf b/doc/gawk_api-figure2.pdf Binary files differindex 20462856..20462856 100644 --- a/doc/api-figure2.pdf +++ b/doc/gawk_api-figure2.pdf diff --git a/doc/api-figure2.png b/doc/gawk_api-figure2.png Binary files differindex a6e28c98..a6e28c98 100644 --- a/doc/api-figure2.png +++ b/doc/gawk_api-figure2.png diff --git a/doc/api-figure2.txt b/doc/gawk_api-figure2.txt index 5ed8e2a8..5ed8e2a8 100644 --- a/doc/api-figure2.txt +++ b/doc/gawk_api-figure2.txt diff --git a/doc/api-figure3.eps b/doc/gawk_api-figure3.eps index d713575f..d713575f 100644 --- a/doc/api-figure3.eps +++ b/doc/gawk_api-figure3.eps diff --git a/doc/api-figure3.fig b/doc/gawk_api-figure3.fig index 5c7fdd97..5c7fdd97 100644 --- a/doc/api-figure3.fig +++ b/doc/gawk_api-figure3.fig diff --git a/doc/api-figure3.pdf b/doc/gawk_api-figure3.pdf Binary files differindex 517b2ecc..517b2ecc 100644 --- a/doc/api-figure3.pdf +++ b/doc/gawk_api-figure3.pdf diff --git a/doc/api-figure3.png b/doc/gawk_api-figure3.png Binary files differindex f7db0794..f7db0794 100644 --- a/doc/api-figure3.png +++ b/doc/gawk_api-figure3.png diff --git a/doc/api-figure3.txt b/doc/gawk_api-figure3.txt index a601ce94..a601ce94 100644 --- a/doc/api-figure3.txt +++ b/doc/gawk_api-figure3.txt diff --git a/doc/array-elements.eps b/doc/gawk_array-elements.eps index 041c0b39..041c0b39 100644 --- a/doc/array-elements.eps +++ b/doc/gawk_array-elements.eps diff --git a/doc/array-elements.fig b/doc/gawk_array-elements.fig index 63b5ffbf..63b5ffbf 100644 --- a/doc/array-elements.fig +++ b/doc/gawk_array-elements.fig diff --git a/doc/array-elements.pdf b/doc/gawk_array-elements.pdf Binary files differindex 328cbd1a..328cbd1a 100644 --- a/doc/array-elements.pdf +++ b/doc/gawk_array-elements.pdf diff --git a/doc/array-elements.png b/doc/gawk_array-elements.png Binary files differindex b57d66b7..b57d66b7 100644 --- a/doc/array-elements.png +++ b/doc/gawk_array-elements.png diff --git a/doc/array-elements.txt b/doc/gawk_array-elements.txt index 424c1708..424c1708 100644 --- a/doc/array-elements.txt +++ b/doc/gawk_array-elements.txt diff --git a/doc/general-program.eps b/doc/gawk_general-program.eps index 12497168..12497168 100644 --- a/doc/general-program.eps +++ b/doc/gawk_general-program.eps diff --git a/doc/general-program.fig b/doc/gawk_general-program.fig index 8ab42f3d..8ab42f3d 100644 --- a/doc/general-program.fig +++ b/doc/gawk_general-program.fig diff --git a/doc/general-program.pdf b/doc/gawk_general-program.pdf Binary files differindex f4f7572c..f4f7572c 100644 --- a/doc/general-program.pdf +++ b/doc/gawk_general-program.pdf diff --git a/doc/general-program.png b/doc/gawk_general-program.png Binary files differindex 7737261f..7737261f 100644 --- a/doc/general-program.png +++ b/doc/gawk_general-program.png diff --git a/doc/general-program.txt b/doc/gawk_general-program.txt index cb85c294..cb85c294 100644 --- a/doc/general-program.txt +++ b/doc/gawk_general-program.txt diff --git a/doc/process-flow.eps b/doc/gawk_process-flow.eps index 81b937ba..81b937ba 100644 --- a/doc/process-flow.eps +++ b/doc/gawk_process-flow.eps diff --git a/doc/process-flow.fig b/doc/gawk_process-flow.fig index b6613137..b6613137 100644 --- a/doc/process-flow.fig +++ b/doc/gawk_process-flow.fig diff --git a/doc/process-flow.pdf b/doc/gawk_process-flow.pdf Binary files differindex 3ff5f9e8..3ff5f9e8 100644 --- a/doc/process-flow.pdf +++ b/doc/gawk_process-flow.pdf diff --git a/doc/process-flow.png b/doc/gawk_process-flow.png Binary files differindex 97f467f4..97f467f4 100644 --- a/doc/process-flow.png +++ b/doc/gawk_process-flow.png diff --git a/doc/process-flow.txt b/doc/gawk_process-flow.txt index d7296385..d7296385 100644 --- a/doc/process-flow.txt +++ b/doc/gawk_process-flow.txt diff --git a/doc/statist.eps b/doc/gawk_statist.eps index 1e1f09fa..1e1f09fa 100644 --- a/doc/statist.eps +++ b/doc/gawk_statist.eps diff --git a/doc/statist.jpg b/doc/gawk_statist.jpg Binary files differindex 92428f25..92428f25 100644 --- a/doc/statist.jpg +++ b/doc/gawk_statist.jpg diff --git a/doc/statist.pdf b/doc/gawk_statist.pdf Binary files differindex 2122a33a..2122a33a 100644 --- a/doc/statist.pdf +++ b/doc/gawk_statist.pdf diff --git a/doc/statist.txt b/doc/gawk_statist.txt index 261019d6..261019d6 100644 --- a/doc/statist.txt +++ b/doc/gawk_statist.txt diff --git a/doc/gawkinet.info b/doc/gawkinet.info index 1f22414e..c961462f 100644 --- a/doc/gawkinet.info +++ b/doc/gawkinet.info @@ -1,7 +1,7 @@ -This is gawkinet.info, produced by makeinfo version 6.8 from +This is gawkinet.info, produced by makeinfo version 7.0.1 from gawkinet.texi. -This is Edition 1.6 of 'TCP/IP Internetworking with 'gawk'', for the +This is Edition 1.6 of ‘TCP/IP Internetworking with ‘gawk’’, for the 5.2.0 (or later) version of the GNU implementation of AWK. @@ -12,19 +12,19 @@ This is Edition 1.6 of 'TCP/IP Internetworking with 'gawk'', for the Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with the -Invariant Sections being "GNU General Public License", the Front-Cover +Invariant Sections being “GNU General Public License”, the Front-Cover texts being (a) (see below), and with the Back-Cover Texts being (b) (see below). A copy of the license is included in the section entitled -"GNU Free Documentation License". +“GNU Free Documentation License”. - a. "A GNU Manual" + a. “A GNU Manual” - b. "You have the freedom to copy and modify this GNU manual. Buying + b. “You have the freedom to copy and modify this GNU manual. Buying copies from the FSF supports it in developing GNU and promoting - software freedom." + software freedom.” INFO-DIR-SECTION Network applications START-INFO-DIR-ENTRY -* awkinet: (gawkinet). TCP/IP Internetworking With 'gawk'. +* awkinet: (gawkinet). TCP/IP Internetworking With ‘gawk’. END-INFO-DIR-ENTRY @@ -33,10 +33,10 @@ File: gawkinet.info, Node: Top, Next: Preface, Prev: (dir), Up: (dir) General Introduction ******************** -This file documents the networking features in GNU Awk ('gawk') version +This file documents the networking features in GNU Awk (‘gawk’) version 4.0 and later. - This is Edition 1.6 of 'TCP/IP Internetworking with 'gawk'', for the + This is Edition 1.6 of ‘TCP/IP Internetworking with ‘gawk’’, for the 5.2.0 (or later) version of the GNU implementation of AWK. @@ -47,16 +47,16 @@ This file documents the networking features in GNU Awk ('gawk') version Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with the -Invariant Sections being "GNU General Public License", the Front-Cover +Invariant Sections being “GNU General Public License”, the Front-Cover texts being (a) (see below), and with the Back-Cover Texts being (b) (see below). A copy of the license is included in the section entitled -"GNU Free Documentation License". +“GNU Free Documentation License”. - a. "A GNU Manual" + a. “A GNU Manual” - b. "You have the freedom to copy and modify this GNU manual. Buying + b. “You have the freedom to copy and modify this GNU manual. Buying copies from the FSF supports it in developing GNU and promoting - software freedom." + software freedom.” * Menu: @@ -75,7 +75,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b) * Basic Protocols:: The basic protocols. * Ports:: The idea behind ports. * Making Connections:: Making TCP/IP connections. -* Gawk Special Files:: How to do 'gawk' networking. +* Gawk Special Files:: How to do ‘gawk’ networking. * Special File Fields:: The fields in the special file name. * Comparing Protocols:: Differences between the protocols. * File /inet/tcp:: The TCP special file. @@ -110,28 +110,28 @@ Preface ******* In May of 1997, Jürgen Kahrs felt the need for network access from -'awk', and, with a little help from me, set about adding features to do -this for 'gawk'. At that time, he wrote the bulk of this Info file. +‘awk’, and, with a little help from me, set about adding features to do +this for ‘gawk’. At that time, he wrote the bulk of this Info file. - The code and documentation were added to the 'gawk' 3.1 development + The code and documentation were added to the ‘gawk’ 3.1 development tree, and languished somewhat until I could finally get down to some -serious work on that version of 'gawk'. This finally happened in the +serious work on that version of ‘gawk’. This finally happened in the middle of 2000. Meantime, Jürgen wrote an article about the Internet special files -and '|&' operator for 'Linux Journal', and made a networking patch for -the production versions of 'gawk' available from his home page. In -August of 2000 (for 'gawk' 3.0.6), this patch also made it to the main -GNU 'ftp' distribution site. +and ‘|&’ operator for ‘Linux Journal’, and made a networking patch for +the production versions of ‘gawk’ available from his home page. In +August of 2000 (for ‘gawk’ 3.0.6), this patch also made it to the main +GNU ‘ftp’ distribution site. - For release with 'gawk', I edited Jürgen's prose for English grammar + For release with ‘gawk’, I edited Jürgen’s prose for English grammar and style, as he is not a native English speaker. I also rearranged the material somewhat for what I felt was a better order of presentation, and (re)wrote some of the introductory material. The majority of this document and the code are his work, and the high quality and interesting ideas speak for themselves. It is my hope that -these features will be of significant value to the 'awk' community. +these features will be of significant value to the ‘awk’ community. Arnold Robbins @@ -145,7 +145,7 @@ File: gawkinet.info, Node: Introduction, Next: Using Networking, Prev: Prefac ********************* This major node provides a (necessarily) brief introduction to computer -networking concepts. For many applications of 'gawk' to TCP/IP +networking concepts. For many applications of ‘gawk’ to TCP/IP networking, we hope that this is enough. For more advanced tasks, you will need deeper background, and it may be necessary to switch to lower-level programming in C or C++. @@ -180,7 +180,7 @@ When you make a phone call, the following steps occur: network, refuses to answer the call. 4. Assuming the other party answers, the connection between you is now - a "duplex" (two-way), "reliable" (no data lost), sequenced (data + a “duplex” (two-way), “reliable” (no data lost), sequenced (data comes out in the order sent) data stream. 5. You and your friend may now talk freely, with the phone system @@ -190,7 +190,7 @@ When you make a phone call, the following steps occur: The same steps occur in a duplex reliable computer networking connection. There is considerably more overhead in setting up the -communications, but once it's done, data moves in both directions, +communications, but once it’s done, data moves in both directions, reliably, in sequence. @@ -215,21 +215,21 @@ following. 5. One or more may get lost in the mail. (Although, fortunately, this does not occur very often.) - 6. In a computer network, one or more "packets" may also arrive - multiple times. (This doesn't happen with the postal system!) + 6. In a computer network, one or more “packets” may also arrive + multiple times. (This doesn’t happen with the postal system!) The important characteristics of datagram communications, like those of the postal system are thus: - * Delivery is "best effort;" the data may never get there. + • Delivery is “best effort;” the data may never get there. - * Each message is self-contained, including the source and + • Each message is self-contained, including the source and destination addresses. - * Delivery is _not_ sequenced; packets may arrive out of order, + • Delivery is _not_ sequenced; packets may arrive out of order, and/or multiple times. - * Unlike the phone system, overhead is considerably lower. It is not + • Unlike the phone system, overhead is considerably lower. It is not necessary to set up the call first. The price the user pays for the lower overhead of datagram @@ -245,7 +245,7 @@ File: gawkinet.info, Node: The TCP/IP Protocols, Next: Making Connections, Pr The Internet Protocol Suite (usually referred to as just TCP/IP)(1) consists of a number of different protocols at different levels or -"layers." For our purposes, three protocols provide the fundamental +“layers.” For our purposes, three protocols provide the fundamental communications mechanisms. All other defined protocols are referred to as user-level protocols (e.g., HTTP, used later in this Info file). @@ -269,8 +269,8 @@ File: gawkinet.info, Node: Basic Protocols, Next: Ports, Prev: The TCP/IP Pro IP The Internet Protocol. This protocol is almost never used directly by applications. It provides the basic packet delivery and routing - infrastructure of the Internet. Much like the phone company's - switching centers or the Post Office's trucks, it is not of much + infrastructure of the Internet. Much like the phone company’s + switching centers or the Post Office’s trucks, it is not of much day-to-day interest to the regular user (or programmer). It happens to be a best effort datagram protocol. In the early twenty-first century, there are two versions of this protocol in @@ -281,11 +281,11 @@ IP addresses, on which most of the current Internet is based. IPv6 - The "next generation" of the Internet Protocol, with 128-bit + The “next generation” of the Internet Protocol, with 128-bit addresses. This protocol is in wide use in certain parts of the world, but has not yet replaced IPv4.(1) - Versions of the other protocols that sit "atop" IP exist for both + Versions of the other protocols that sit “atop” IP exist for both IPv4 and IPv6. However, as the IPv6 versions are fundamentally the same as the original IPv4 versions, we will not distinguish further between them. @@ -293,14 +293,14 @@ IP UDP The User Datagram Protocol. This is a best effort datagram protocol. It provides a small amount of extra reliability over IP, - and adds the notion of "ports", described in *note TCP and UDP + and adds the notion of “ports”, described in *note TCP and UDP Ports: Ports. TCP The Transmission Control Protocol. This is a duplex, reliable, sequenced byte-stream protocol, again layered on top of IP, and also providing the notion of ports. This is the protocol that you - will most likely use when using 'gawk' for network programming. + will most likely use when using ‘gawk’ for network programming. All other user-level protocols use either TCP or UDP to do their basic communications. Examples are SMTP (Simple Mail Transfer @@ -309,7 +309,7 @@ Protocol). ---------- Footnotes ---------- - (1) There isn't an IPv5. + (1) There isn’t an IPv5. File: gawkinet.info, Node: Ports, Prev: Basic Protocols, Up: The TCP/IP Protocols @@ -323,20 +323,20 @@ than one person at the location; thus you have to further quantify the recipient by putting a person or company name on the envelope. In the phone system, one phone number may represent an entire -company, in which case you need a person's extension number in order to +company, in which case you need a person’s extension number in order to reach that individual directly. Or, when you call a home, you have to -say, "May I please speak to ..." before talking to the person directly. +say, “May I please speak to ...” before talking to the person directly. IP networking provides the concept of addressing. An IP address represents a particular computer, but no more. In order to reach the mail service on a system, or the FTP or WWW service on a system, you must have some way to further specify which service you want. In the -Internet Protocol suite, this is done with "port numbers", which +Internet Protocol suite, this is done with “port numbers”, which represent the services, much like an extension number used with a phone number. Port numbers are 16-bit integers. Unix and Unix-like systems reserve -ports below 1024 for "well known" services, such as SMTP, FTP, and HTTP. +ports below 1024 for “well known” services, such as SMTP, FTP, and HTTP. Numbers 1024 and above may be used by any application, although there is no promise made that a particular port number is always available. @@ -346,22 +346,22 @@ File: gawkinet.info, Node: Making Connections, Prev: The TCP/IP Protocols, Up 1.4 Making TCP/IP Connections (And Some Terminology) ==================================================== -Two terms come up repeatedly when discussing networking: "client" and -"server". For now, we'll discuss these terms at the "connection level", +Two terms come up repeatedly when discussing networking: “client” and +“server”. For now, we’ll discuss these terms at the “connection level”, when first establishing connections between two processes on different systems over a network. (Once the connection is established, the higher -level, or "application level" protocols, such as HTTP or FTP, determine +level, or “application level” protocols, such as HTTP or FTP, determine who is the client and who is the server. Often, it turns out that the client and server are the same in both roles.) - The "server" is the system providing the service, such as the web -server or email server. It is the "host" (system) which is _connected + The “server” is the system providing the service, such as the web +server or email server. It is the “host” (system) which is _connected to_ in a transaction. For this to work though, the server must be expecting connections. Much as there has to be someone at the office building to answer the phone,(1) the server process (usually) has to be started first and be waiting for a connection. - The "client" is the system requesting the service. It is the system + The “client” is the system requesting the service. It is the system _initiating the connection_ in a transaction. (Just as when you pick up the phone to call an office or store.) @@ -373,19 +373,19 @@ can a new one be built up on the same port. This is contrary to the usual behavior of fully developed web servers which have to avoid situations in which they are not reachable. We have to pay this price in order to enjoy the benefits of a simple communication paradigm in -'gawk'.) +‘gawk’.) Furthermore, once the connection is established, communications are -"synchronous".(2) I.e., each end waits on the other to finish +“synchronous”.(2) I.e., each end waits on the other to finish transmitting, before replying. This is much like two people in a phone conversation. While both could talk simultaneously, doing so usually -doesn't work too well. +doesn’t work too well. In the case of TCP, the synchronicity is enforced by the protocol -when sending data. Data writes "block" until the data have been +when sending data. Data writes “block” until the data have been received on the other end. For both TCP and UDP, data reads block until there is incoming data waiting to be read. This is summarized in the -following table, where an "x" indicates that the given action blocks. +following table, where an “x” indicates that the given action blocks. TCP x x UDP x @@ -394,33 +394,33 @@ UDP x (1) In the days before voice mail systems! - (2) For the technically savvy, data reads block--if there's no + (2) For the technically savvy, data reads block—if there’s no incoming data, the program is made to wait until there is, instead of -receiving a "there's no data" error return. +receiving a “there’s no data” error return. File: gawkinet.info, Node: Using Networking, Next: Some Applications and Techniques, Prev: Introduction, Up: Top -2 Networking With 'gawk' +2 Networking With ‘gawk’ ************************ -The 'awk' programming language was originally developed as a +The ‘awk’ programming language was originally developed as a pattern-matching language for writing short programs to perform data -manipulation tasks. 'awk''s strength is the manipulation of textual +manipulation tasks. ‘awk’’s strength is the manipulation of textual data that is stored in files. It was never meant to be used for networking purposes. To exploit its features in a networking context, -it's necessary to use an access mode for network connections that +it’s necessary to use an access mode for network connections that resembles the access of files as closely as possible. - 'awk' is also meant to be a prototyping language. It is used to + ‘awk’ is also meant to be a prototyping language. It is used to demonstrate feasibility and to play with features and user interfaces. -This can be done with file-like handling of network connections. 'gawk' +This can be done with file-like handling of network connections. ‘gawk’ trades the lack of many of the advanced features of the TCP/IP family of protocols for the convenience of simple connection handling. The advanced features are available when programming in C or Perl. In fact, the network programming in this major node is very similar to what is -described in books such as 'Internet Programming with Python', 'Advanced -Perl Programming', or 'Web Client Programming with Perl'. +described in books such as ‘Internet Programming with Python’, ‘Advanced +Perl Programming’, or ‘Web Client Programming with Perl’. However, you can do the programming here without first having to learn object-oriented ideology; underlying languages such as Tcl/Tk, @@ -432,7 +432,7 @@ protocol is much less important for most users. * Menu: -* Gawk Special Files:: How to do 'gawk' networking. +* Gawk Special Files:: How to do ‘gawk’ networking. * TCP Connecting:: Making a TCP connection. * Troubleshooting:: Troubleshooting TCP/IP connections. * Interacting:: Interacting with a service. @@ -448,30 +448,30 @@ protocol is much less important for most users. File: gawkinet.info, Node: Gawk Special Files, Next: TCP Connecting, Prev: Using Networking, Up: Using Networking -2.1 'gawk''s Networking Mechanisms +2.1 ‘gawk’’s Networking Mechanisms ================================== -The '|&' operator for use in communicating with a "coprocess" is +The ‘|&’ operator for use in communicating with a “coprocess” is described in *note Two-way Communications With Another Process: (gawk)Two-way I/O. It shows how to do two-way I/O to a separate process, -sending it data with 'print' or 'printf' and reading data with -'getline'. If you haven't read it already, you should detour there to +sending it data with ‘print’ or ‘printf’ and reading data with +‘getline’. If you haven’t read it already, you should detour there to do so. - 'gawk' transparently extends the two-way I/O mechanism to simple -networking through the use of special file names. When a "coprocess" + ‘gawk’ transparently extends the two-way I/O mechanism to simple +networking through the use of special file names. When a “coprocess” that matches the special files we are about to describe is started, -'gawk' creates the appropriate network connection, and then two-way I/O +‘gawk’ creates the appropriate network connection, and then two-way I/O proceeds as usual. At the C, C++, and Perl level, networking is accomplished via -"sockets", an Application Programming Interface (API) originally +“sockets”, an Application Programming Interface (API) originally developed at the University of California at Berkeley that is now used almost universally for TCP/IP networking. Socket level programming, while fairly straightforward, requires paying attention to a number of details, as well as using binary data. It is not well-suited for use -from a high-level language like 'awk'. The special files provided in -'gawk' hide the details from the programmer, making things much simpler +from a high-level language like ‘awk’. The special files provided in +‘gawk’ hide the details from the programmer, making things much simpler and easier to use. The special file name for network access is made up of several @@ -495,39 +495,39 @@ File: gawkinet.info, Node: Special File Fields, Next: Comparing Protocols, Pr This node explains the meaning of all of the fields, as well as the range of values and the defaults. All of the fields are mandatory. To -let the system pick a value, or if the field doesn't apply to the -protocol, specify it as '0' (zero): +let the system pick a value, or if the field doesn’t apply to the +protocol, specify it as ‘0’ (zero): NET-TYPE - This is one of 'inet4' for IPv4, 'inet6' for IPv6, or 'inet' to use + This is one of ‘inet4’ for IPv4, ‘inet6’ for IPv6, or ‘inet’ to use the system default (which is likely to be IPv4). For the rest of - this document, we will use the generic '/inet' in our descriptions - of how 'gawk''s networking works. + this document, we will use the generic ‘/inet’ in our descriptions + of how ‘gawk’’s networking works. PROTOCOL Determines which member of the TCP/IP family of protocols is selected to transport the data across the network. There are two - possible values (always written in lowercase): 'tcp' and 'udp'. + possible values (always written in lowercase): ‘tcp’ and ‘udp’. The exact meaning of each is explained later in this node. LOCALPORT Determines which port on the local machine is used to communicate - across the network. Application-level clients usually use '0' to - indicate they do not care which local port is used--instead they + across the network. Application-level clients usually use ‘0’ to + indicate they do not care which local port is used—instead they specify a remote port to connect to. It is vital for application-level servers to use a number different - from '0' here because their service has to be available at a + from ‘0’ here because their service has to be available at a specific publicly known port number. It is possible to use a name - from '/etc/services' here. + from ‘/etc/services’ here. HOSTNAME Determines which remote host is to be at the other end of the connection. Application-level clients must enter a name different - from '0'. The name can be either symbolic (e.g., - 'jpl-devvax.jpl.nasa.gov') or numeric (e.g., '128.149.1.143'). + from ‘0’. The name can be either symbolic (e.g., + ‘jpl-devvax.jpl.nasa.gov’) or numeric (e.g., ‘128.149.1.143’). - Application-level servers must fill this field with a '0' to + Application-level servers must fill this field with a ‘0’ to indicate their being open for all other hosts to connect to them and enforce connection level server behavior this way. It is not possible for an application-level server to restrict its @@ -535,19 +535,19 @@ HOSTNAME REMOTEPORT Determines which port on the remote machine is used to communicate - across the network. For '/inet/tcp' and '/inet/udp', - application-level clients _must_ use a number other than '0' to + across the network. For ‘/inet/tcp’ and ‘/inet/udp’, + application-level clients _must_ use a number other than ‘0’ to indicate to which port on the remote machine they want to connect. - Application-level servers must not fill this field with a '0'. + Application-level servers must not fill this field with a ‘0’. Instead they specify a local port to which clients connect. It is - possible to use a name from '/etc/services' here. + possible to use a name from ‘/etc/services’ here. Experts in network programming will notice that the usual client/server asymmetry found at the level of the socket API is not visible here. This is for the sake of simplicity of the high-level concept. If this asymmetry is necessary for your application, use -another language. For 'gawk', it is more important to enable users to +another language. For ‘gawk’, it is more important to enable users to write a client program with a minimum of code. What happens when first accessing a network connection is seen in the following pseudocode: @@ -567,7 +567,7 @@ accessing a network connection is seen in the following pseudocode: fields of the special file name. When in doubt, *note Table 2.1: table-inet-components. gives you the combinations of values and their meaning. If this table is too complicated, focus on the three lines -printed in *bold*. All the examples in *note Networking With 'gawk': +printed in *bold*. All the examples in *note Networking With ‘gawk’: Using Networking, use only the patterns printed in bold letters. @@ -590,7 +590,7 @@ tcp, udp x 0 x Invalid tcp, udp 0 0 0 Invalid tcp, udp 0 x 0 Invalid -Table 2.1: '/inet' Special File Components +Table 2.1: ‘/inet’ Special File Components In general, TCP is the preferred mechanism to use. It is the simplest protocol to understand and to use. Use UDP only if @@ -615,7 +615,7 @@ available and demonstrate the differences between them. File: gawkinet.info, Node: File /inet/tcp, Next: File /inet/udp, Prev: Comparing Protocols, Up: Comparing Protocols -2.1.2.1 '/inet/tcp' +2.1.2.1 ‘/inet/tcp’ ................... Once again, always use TCP. (Use UDP when low overhead is a necessity.) @@ -646,7 +646,7 @@ started first, and it waits for the receiver to read a line. File: gawkinet.info, Node: File /inet/udp, Prev: File /inet/tcp, Up: Comparing Protocols -2.1.2.2 '/inet/udp' +2.1.2.2 ‘/inet/udp’ ................... The server and client programs that use UDP are almost identical to @@ -671,13 +671,13 @@ started first: close("/inet/udp/0/localhost/8888") } - In the case of UDP, the initial 'print' command is the one that -actually sends data so that there is a connection. UDP and "connection" + In the case of UDP, the initial ‘print’ command is the one that +actually sends data so that there is a connection. UDP and “connection” sounds strange to anyone who has learned that UDP is a connectionless -protocol. Here, "connection" means that the 'connect()' system call has -completed its work and completed the "association" between a certain +protocol. Here, “connection” means that the ‘connect()’ system call has +completed its work and completed the “association” between a certain socket and an IP address. Thus there are subtle differences between -'connect()' for TCP and UDP; see the man page for details.(1) +‘connect()’ for TCP and UDP; see the man page for details.(1) UDP cannot guarantee that the datagrams at the receiving end will arrive in exactly the same order they were sent. Some datagrams could @@ -689,7 +689,7 @@ stateless services like the original versions of NFS. ---------- Footnotes ---------- (1) This subtlety is just one of many details that are hidden in the -socket API, invisible and intractable for the 'gawk' user. The +socket API, invisible and intractable for the ‘gawk’ user. The developers are currently considering how to rework the network facilities to make them easier to understand and use. @@ -699,9 +699,9 @@ File: gawkinet.info, Node: TCP Connecting, Next: Troubleshooting, Prev: Gawk 2.2 Establishing a TCP Connection ================================= -Let's observe a network connection at work. Type in the following +Let’s observe a network connection at work. Type in the following program and watch the output. Within a second, it connects via TCP -('/inet/tcp') to a remote server and asks the service 'daytime' on the +(‘/inet/tcp’) to a remote server and asks the service ‘daytime’ on the machine what time it is: BEGIN { @@ -714,43 +714,43 @@ machine what time it is: close(daytime_connection) } - Even experienced 'awk' users will find the fourth and sixth line + Even experienced ‘awk’ users will find the fourth and sixth line strange in two respects: - * A string containing the name of a special file is used as a shell - command that pipes its output into 'getline'. One would rather + • A string containing the name of a special file is used as a shell + command that pipes its output into ‘getline’. One would rather expect to see the special file being read like any other file - ('getline < "/inet/tcp/0/time-a-g.nist.gov/daytime"'). + (‘getline < "/inet/tcp/0/time-a-g.nist.gov/daytime"’). - * The operator '|&' has not been part of any 'awk' implementation - (until now). It is actually the only extension of the 'awk' + • The operator ‘|&’ has not been part of any ‘awk’ implementation + (until now). It is actually the only extension of the ‘awk’ language needed (apart from the special files) to introduce network access. - The '|&' operator was introduced in 'gawk' 3.1 in order to overcome -the crucial restriction that access to files and pipes in 'awk' is + The ‘|&’ operator was introduced in ‘gawk’ 3.1 in order to overcome +the crucial restriction that access to files and pipes in ‘awk’ is always unidirectional. It was formerly impossible to use both access modes on the same file or pipe. Instead of changing the whole concept -of file access, the '|&' operator behaves exactly like the usual pipe +of file access, the ‘|&’ operator behaves exactly like the usual pipe operator except for two additions: - * Normal shell commands connected to their 'gawk' program with a '|&' - pipe can be accessed bidirectionally. The '|&' turns out to be a - quite general, useful, and natural extension of 'awk'. + • Normal shell commands connected to their ‘gawk’ program with a ‘|&’ + pipe can be accessed bidirectionally. The ‘|&’ turns out to be a + quite general, useful, and natural extension of ‘awk’. - * Pipes that consist of a special file name for network connections + • Pipes that consist of a special file name for network connections are not executed as shell commands. Instead, they can be read and written to, just like a full-duplex network connection. - In the earlier example, the '|&' operator tells 'getline' to read a -line from the special file '/inet/tcp/0/time-a-g.nist.gov/daytime'. We + In the earlier example, the ‘|&’ operator tells ‘getline’ to read a +line from the special file ‘/inet/tcp/0/time-a-g.nist.gov/daytime’. We could also have printed a line into the special file. But instead we just consumed an empty leading line, printed it, then read a line with the time, printed that, and closed the connection. (While we could just -let 'gawk' close the connection by finishing the program, in this Info +let ‘gawk’ close the connection by finishing the program, in this Info file we are pedantic and always explicitly close the connections.) - Network services like 'daytime' are not really useful because there + Network services like ‘daytime’ are not really useful because there are so many better ways to print the current time. In the early days of TCP networking, such a service may have looked like a good idea for testing purposes. Later, simple TCP services like these have been used @@ -760,7 +760,7 @@ services. The list of servers (https://tf.nist.gov/tf-cgi/servers.cgi) that still support the legacy service daytime (https://en.wikipedia.org/wiki/Daytime_Protocol) can be found at Wikipedia. We hesitated to use this service in this manual because it -is hard to find servers that still support services like 'daytime' +is hard to find servers that still support services like ‘daytime’ openly to the Internet. Later on we will see that some of these nostalgic protocols have turned into security risks. @@ -778,14 +778,14 @@ network programming. For the rest of this major node, we will assume you work on a POSIX-style system that supports TCP/IP. If the previous example program does not run on your machine, it may help to replace the value assigned -to the variable 'daytime_server' with the name (or the IP address) of +to the variable ‘daytime_server’ with the name (or the IP address) of another server from the list mentioned above. Now you should see the date and time being printed by the program, otherwise you may have run -out of servers that support the 'daytime' service. +out of servers that support the ‘daytime’ service. - Try changing the service to 'chargen' or 'ftp'. This way, the + Try changing the service to ‘chargen’ or ‘ftp’. This way, the program connects to other services that should give you some response. -If you are curious, you should have a look at your '/etc/services' file. +If you are curious, you should have a look at your ‘/etc/services’ file. It could look like this: # /etc/services: @@ -821,27 +821,27 @@ It could look like this: usually support. If your GNU/Linux machine does not do so, it may be that these services are switched off in some startup script. Systems running some flavor of Microsoft Windows usually do _not_ support these -services. Nevertheless, it _is_ possible to do networking with 'gawk' +services. Nevertheless, it _is_ possible to do networking with ‘gawk’ on Microsoft Windows.(1) The first column of the file gives the name of the service, and the second column gives a unique number and the protocol that one can use to connect to this service. The rest of the -line is treated as a comment. You see that some services ('echo') +line is treated as a comment. You see that some services (‘echo’) support TCP as well as UDP. ---------- Footnotes ---------- (1) Microsoft preferred to ignore the TCP/IP family of protocols until 1995. Then came the rise of the Netscape browser as a landmark -"killer application." Microsoft added TCP/IP support and their own +“killer application.” Microsoft added TCP/IP support and their own browser to Microsoft Windows 95 at the last minute. They even back-ported their TCP/IP implementation to Microsoft Windows for Workgroups 3.11, but it was a rather rudimentary and half-hearted -implementation. Nevertheless, the equivalent of '/etc/services' resides -under 'C:\WINNT\system32\drivers\etc\services' on Microsoft Windows 2000 +implementation. Nevertheless, the equivalent of ‘/etc/services’ resides +under ‘C:\WINNT\system32\drivers\etc\services’ on Microsoft Windows 2000 and Microsoft Windows XP. On Microsoft Windows 7, 8 and 10 there is a -directory '%WinDir%\System32\Drivers\Etc' that holds the 'hosts' file +directory ‘%WinDir%\System32\Drivers\Etc’ that holds the ‘hosts’ file (https://support.microsoft.com/en-us/help/972034/how-to-reset-the-hosts-file-back-to-the-default) -and probably also a 'services' file +and probably also a ‘services’ file (https://www.ibm.com/support/knowledgecenter/SSRNYG_7.2.1/com.ibm.rational.synergy.install.win.doc/topics/sg_r_igw_services_file.html). @@ -852,8 +852,8 @@ File: gawkinet.info, Node: Interacting, Next: Setting Up, Prev: Troubleshooti The next program begins really interacting with a network service by printing something into the special file. It asks the so-called -'finger' service if a user of the machine is logged in. When testing -this program, try to change the variable 'finger_server' to some other +‘finger’ service if a user of the machine is logged in. When testing +this program, try to change the variable ‘finger_server’ to some other machine name in your local network: BEGIN { @@ -869,15 +869,15 @@ machine name in your local network: program repeatedly reads lines that come as a reply. When no more lines are available (because the service has closed the connection), the program also closes the connection. If you tried to replace -'finger_server' with some other server name, the script probably +‘finger_server’ with some other server name, the script probably reported being unable to open the connection, because most servers today no longer support this service. Try replacing the login name of -Professor Nace ('wnace') with another login name (like 'help'). You +Professor Nace (‘wnace’) with another login name (like ‘help’). You will receive a list of login names similar to the one you asked for. In the 1980s you could get a list of all users currently logged in by -asking for an empty string ('""'). +asking for an empty string (‘""’). - The final 'close()' call could be safely deleted from the above + The final ‘close()’ call could be safely deleted from the above script, because the operating system closes any open connection by default when a script reaches the end of execution. But, in order to avoid portability problems, it is best to always close connections @@ -885,9 +885,9 @@ explicitly. With the Linux kernel, for example, proper closing results in flushing of buffers. Letting the close happen by default may result in discarding buffers. - When looking at '/etc/services' you may have noticed that the -'daytime' service is also available with 'udp'. In the earlier -examples, change 'tcp' to 'udp' and try if the 'finger' and 'daytime' + When looking at ‘/etc/services’ you may have noticed that the +‘daytime’ service is also available with ‘udp’. In the earlier +examples, change ‘tcp’ to ‘udp’ and try if the ‘finger’ and ‘daytime’ clients still work as expected. They probably will not respond because a wise administrator switched off these services. But if they do, you may see the expected day and time message. The program then hangs, @@ -897,8 +897,8 @@ and UDP. When using UDP, neither party is automatically informed about the other closing the connection. Continuing to experiment this way reveals many other subtle differences between TCP and UDP. To avoid such trouble, you should always remember the advice Douglas E. Comer and -David Stevens give in Volume III of their series 'Internetworking With -TCP' (page 14): +David Stevens give in Volume III of their series ‘Internetworking With +TCP’ (page 14): When designing client-server applications, beginners are strongly advised to use TCP because it provides reliable, @@ -910,19 +910,19 @@ TCP' (page 14): This advice is actually quite dated and we hesitated to repeat it here. But we left it in because we are still observing beginners running into this pitfall. While this advice has aged quite well, some -other ideas from the 1980s have not. The 'finger' service may still be +other ideas from the 1980s have not. The ‘finger’ service may still be available in Microsoft Windows Server 2019 (https://docs.microsoft.com/en-us/windows-server/administration/windows-commands/finger), but it turned out to be a never-ending cause of trouble. First of all, it is now obvious that a server should never reveal personal data about its users to anonymous client software that connects over the wild wild -Internet. So every server on the Internet should reject 'finger' +Internet. So every server on the Internet should reject ‘finger’ requests (by disabling the port and by disabling the software serving this port). But things got even worse in 2020 when it turned out that -even the client software (the 'finger' command documented in the link +even the client software (the ‘finger’ command documented in the link above) is a security problem. A tool called DarkFinger (https://seclists.org/fulldisclosure/2020/Sep/30) allows to leverage the -Microsoft Windows 'finger.exe' as a file downloader and help evade +Microsoft Windows ‘finger.exe’ as a file downloader and help evade network security devices. @@ -933,16 +933,16 @@ File: gawkinet.info, Node: Setting Up, Next: Email, Prev: Interacting, Up: U The preceding programs behaved as clients that connect to a server somewhere on the Internet and request a particular service. Now we set -up such a service to mimic the behavior of the 'daytime' service. Such +up such a service to mimic the behavior of the ‘daytime’ service. Such a server does not know in advance who is going to connect to it over the network. Therefore, we cannot insert a name for the host to connect to in our special file name. Start the following program in one window. Notice that the service -does not have the name 'daytime', but the number '8888'. From looking -at '/etc/services', you know that names like 'daytime' are just +does not have the name ‘daytime’, but the number ‘8888’. From looking +at ‘/etc/services’, you know that names like ‘daytime’ are just mnemonics for predetermined 16-bit integers. Only the system -administrator ('root') could enter our new service into '/etc/services' +administrator (‘root’) could enter our new service into ‘/etc/services’ with an appropriate name. Also notice that the service name has to be entered into a different field of the special file name because we are setting up a server, not a client: @@ -955,34 +955,34 @@ setting up a server, not a client: Now open another window on the same machine. Copy the client program given as the first example (*note Establishing a TCP Connection: TCP Connecting.) to a new file and edit it, changing the variable -'daytime_server' to 'localhost' and the port name 'daytime' to '8888'. +‘daytime_server’ to ‘localhost’ and the port name ‘daytime’ to ‘8888’. Then start the modified client. You should get a reply like this: $ gawk -f awklib/eg/network/daytimeclient.awk - -| Sun Dec 27 17:33:57 CET 2020 - -| Sun Dec 27 17:33:57 CET 2020 + ⊣ Sun Dec 27 17:33:57 CET 2020 + ⊣ Sun Dec 27 17:33:57 CET 2020 Both programs explicitly close the connection. Now we will intentionally make a mistake to see what happens when the -name '8888' (the port) is already used by another service. Start the +name ‘8888’ (the port) is already used by another service. Start the server program in both windows. The first one works, but the second one complains that it could not open the connection. Each port on a single machine can only be used by one server program at a time. Now terminate -the server program and change the name '8888' to 'echo'. After +the server program and change the name ‘8888’ to ‘echo’. After restarting it, the server program does not run any more, and you know -why: there is already an 'echo' service running on your machine. But -even if this isn't true, you would not get your own 'echo' server +why: there is already an ‘echo’ service running on your machine. But +even if this isn’t true, you would not get your own ‘echo’ server running on a Unix machine, because the ports with numbers smaller than -1024 ('echo' is at port 7) are reserved for 'root'. On machines running +1024 (‘echo’ is at port 7) are reserved for ‘root’. On machines running some flavor of Microsoft Windows, there is no restriction that reserves -ports 1 to 1024 for a privileged user; hence, you can start an 'echo' +ports 1 to 1024 for a privileged user; hence, you can start an ‘echo’ server there. Even in later version of Microsoft Windows, this -restriction of the Unix world seems to have never been adopted 'Does -windows(10/server-2016) have privileged ports?' +restriction of the Unix world seems to have never been adopted ‘Does +windows(10/server-2016) have privileged ports?’ (https://social.technet.microsoft.com/Forums/windowsserver/en-US/334f0770-eda9-475a-a27f-46b80ab7e872/does-windows10server2016-have-privileged-ports-?forum=ws2016). In Microsoft Windows it is the level of the firewall that handles port -access restrictions, not the level of the operating system's kernel. +access restrictions, not the level of the operating system’s kernel. Turning this short server program into something really useful is simple. Imagine a server that first reads a file name from the client @@ -1004,8 +1004,8 @@ contents of the named file across the net. The server-side processing could also be the execution of a command that is transmitted across the network. From this example, you can see how simple it is to open up a security hole on your machine. If you allow clients to connect to your -machine and execute arbitrary commands, anyone would be free to do 'rm --rf *'. +machine and execute arbitrary commands, anyone would be free to do ‘rm +-rf *’. The client side connects to port number 8888 on the server side and sends the name of the desired file to be sent across the same TCP @@ -1058,16 +1058,16 @@ the first email the server has in store: close(POPService) } - We redefine the record separators 'RS' and 'ORS' because the protocol + We redefine the record separators ‘RS’ and ‘ORS’ because the protocol (POP) requires CR-LF to separate lines. After identifying yourself to -the email service, the command 'retr 1' instructs the service to send +the email service, the command ‘retr 1’ instructs the service to send the first of all your email messages in line. If the service replies -with something other than '+OK', the program exits; maybe there is no +with something other than ‘+OK’, the program exits; maybe there is no email. Otherwise, the program first announces that it intends to finish -reading email, and then redefines 'RS' in order to read the entire email +reading email, and then redefines ‘RS’ in order to read the entire email as multiline input in one record. From the POP RFC, we know that the body of the email always ends with a single line containing a single -dot. The program looks for this using 'RS = "\r\n\\.\r\n"'. When it +dot. The program looks for this using ‘RS = "\r\n\\.\r\n"’. When it finds this sequence in the mail message, it quits. You can invoke this program as often as you like; it does not delete the message it reads, but instead leaves it on the server. @@ -1078,14 +1078,14 @@ but instead leaves it on the server. simple when email was young in the 20th century. These days, unencrypted plaintext authentication is usually disallowed on non-secure connections. Since encryption of network connections is not supported -in 'gawk', you should not use 'gawk' to write such scripts. We left +in ‘gawk’, you should not use ‘gawk’ to write such scripts. We left this node as it is because it demonstrates how application level protocols work in principle (a command being issued by the client followed by a reply coming back). Unfortunately, modern application level protocols are much more flexible in the sequence of actions. For example, modern POP3 servers may introduce themselves with an unprompted initial line that arrives before the initial command. Dealing with such -variance is not worth the effort in 'gawk'. +variance is not worth the effort in ‘gawk’. File: gawkinet.info, Node: Web page, Next: Primitive Service, Prev: Email, Up: Using Networking @@ -1105,7 +1105,7 @@ retrieving a web page. It uses the prehistoric syntax of HTTP 0.9, which almost all web servers still support. The most noticeable thing about it is that the program directs the request to the local proxy server whose name you insert in the special file name (which in turn -calls 'www.yahoo.com'): +calls ‘www.yahoo.com’): BEGIN { RS = ORS = "\r\n" @@ -1116,14 +1116,14 @@ calls 'www.yahoo.com'): close(HttpService) } - Again, lines are separated by a redefined 'RS' and 'ORS'. The 'GET' + Again, lines are separated by a redefined ‘RS’ and ‘ORS’. The ‘GET’ request that we send to the server is the only kind of HTTP request that existed when the web was created in the early 1990s. HTTP calls this -'GET' request a "method," which tells the service to transmit a web page +‘GET’ request a “method,” which tells the service to transmit a web page (here the home page of the Yahoo! search engine). Version 1.0 added -the request methods 'HEAD' and 'POST'. The current version of HTTP is -1.1,(1)(2) and knows the additional request methods 'OPTIONS', 'PUT', -'DELETE', and 'TRACE'. You can fill in any valid web address, and the +the request methods ‘HEAD’ and ‘POST’. The current version of HTTP is +1.1,(1)(2) and knows the additional request methods ‘OPTIONS’, ‘PUT’, +‘DELETE’, and ‘TRACE’. You can fill in any valid web address, and the program prints the HTML code of that page to your screen. Notice the similarity between the responses of the POP and HTTP @@ -1132,7 +1132,7 @@ and then you get the body of the page in HTML. The lines of the headers also have the same form as in POP. There is the name of a parameter, then a colon, and finally the value of that parameter. - Images ('.png' or '.gif' files) can also be retrieved this way, but + Images (‘.png’ or ‘.gif’ files) can also be retrieved this way, but then you get binary data that should be redirected into a file. Another application is calling a CGI (Common Gateway Interface) script on some server. CGI scripts are used when the contents of a web page are not @@ -1154,11 +1154,11 @@ obsolete by RFC 2616, an update without any substantial changes. (2) Version 2.0 of HTTP (https://en.wikipedia.org/wiki/HTTP/2) was defined in RFC7540 (https://tools.ietf.org/html/rfc7540) and was derived -from Google's SPDY (https://en.wikipedia.org/wiki/SPDY) protocol. It is +from Google’s SPDY (https://en.wikipedia.org/wiki/SPDY) protocol. It is said to be widely supported. As of 2020 the most popular web sites still identify themselves as supporting HTTP/1.1. Version 3.0 of HTTP (https://en.wikipedia.org/wiki/HTTP/3) is still a draft and was derived -from Google's QUIC (https://en.wikipedia.org/wiki/QUIC) protocol. +from Google’s QUIC (https://en.wikipedia.org/wiki/QUIC) protocol. File: gawkinet.info, Node: Primitive Service, Next: Interacting Service, Prev: Web page, Up: Using Networking @@ -1167,12 +1167,12 @@ File: gawkinet.info, Node: Primitive Service, Next: Interacting Service, Prev =========================== Now we know enough about HTTP to set up a primitive web service that -just says '"Hello, world"' when someone connects to it with a browser. +just says ‘"Hello, world"’ when someone connects to it with a browser. Compared to the situation in the preceding node, our program changes the role. It tries to behave just like the server we have observed. Since we are setting up a server here, we have to insert the port number in -the 'localport' field of the special file name. The other two fields -(HOSTNAME and REMOTEPORT) have to contain a '0' because we do not know +the ‘localport’ field of the special file name. The other two fields +(HOSTNAME and REMOTEPORT) have to contain a ‘0’ because we do not know in advance which host will connect to our service. In the early 1990s, all a server had to do was send an HTML document @@ -1191,7 +1191,7 @@ The steps are as follows: bytes will be sent. The header is terminated as usual with an empty line. - 3. Send the '"Hello, world"' body in HTML. The useless 'while' loop + 3. Send the ‘"Hello, world"’ body in HTML. The useless ‘while’ loop swallows the request of the browser. We could actually omit the loop, and on most machines the program would still work. First, start the following program: @@ -1215,7 +1215,7 @@ The steps are as follows: point to <http://localhost:8080> (the browser needs to know on which port our server is listening for requests). If this does not work, the browser probably tries to connect to a proxy server that does not know -your machine. If so, change the browser's configuration so that the +your machine. If so, change the browser’s configuration so that the browser does not try to use a proxy to connect to your machine. @@ -1233,13 +1233,13 @@ Applications and Techniques::. * CGI Lib:: A simple CGI library. Setting up a web service that allows user interaction is more -difficult and shows us the limits of network access in 'gawk'. In this -node, we develop a main program (a 'BEGIN' pattern and its action) that +difficult and shows us the limits of network access in ‘gawk’. In this +node, we develop a main program (a ‘BEGIN’ pattern and its action) that will become the core of event-driven execution controlled by a graphical user interface (GUI). Each HTTP event that the user triggers by some action within the browser is received in this central procedure. Parameters and menu choices are extracted from this request, and an -appropriate measure is taken according to the user's choice: +appropriate measure is taken according to the user’s choice: BEGIN { if (MyHost == "") { @@ -1289,7 +1289,7 @@ appropriate measure is taken according to the user's choice: This web server presents menu choices in the form of HTML links. Therefore, it has to tell the browser the name of the host it is residing on. When starting the server, the user may supply the name of -the host from the command line with 'gawk -v MyHost="Rumpelstilzchen"'. +the host from the command line with ‘gawk -v MyHost="Rumpelstilzchen"’. If the user does not do this, the server looks up the name of the host it is running on for later use as a web address in HTML documents. The same applies to the port number. These values are inserted later into @@ -1297,7 +1297,7 @@ the HTML content of the web pages to refer to the home system. Each server that is built around this core has to initialize some application-dependent variables (such as the default home page) in a -function 'SetUpServer()', which is called immediately before entering +function ‘SetUpServer()’, which is called immediately before entering the infinite loop of the server. For now, we will write an instance that initiates a trivial interaction. With this home page, the client user can click on two possible choices, and receive the current date @@ -1316,13 +1316,13 @@ either in human-readable format or in seconds since 1970: On the first run through the main loop, the default line terminators are set and the default home page is copied to the actual home page. -Since this is the first run, 'GETARG["Method"]' is not initialized yet, +Since this is the first run, ‘GETARG["Method"]’ is not initialized yet, hence the case selection over the method does nothing. Now that the home page is initialized, the server can start communicating to a client browser. It does so by printing the HTTP header into the network connection -('print ... |& HttpService'). This command blocks execution of the +(‘print ... |& HttpService’). This command blocks execution of the server script until a client connects. If you compare this server script with the primitive one we wrote @@ -1336,15 +1336,15 @@ always displaying the same time of day although time advances each second. Having supplied the initial home page to the browser with a valid -document stored in the parameter 'Prompt', it closes the connection and +document stored in the parameter ‘Prompt’, it closes the connection and waits for the next request. When the request comes, a log line is printed that allows us to see which request the server receives. The -final step in the loop is to call the function 'CGI_setup()', which +final step in the loop is to call the function ‘CGI_setup()’, which reads all the lines of the request (coming from the browser), processes -them, and stores the transmitted parameters in the array 'PARAM'. The +them, and stores the transmitted parameters in the array ‘PARAM’. The complete text of these application-independent functions can be found in *note A Simple CGI Library: CGI Lib. For now, we use a simplified -version of 'CGI_setup()': +version of ‘CGI_setup()’: function CGI_setup( method, uri, version, i) { delete GETARG; delete MENU; delete PARAM @@ -1370,26 +1370,26 @@ version of 'CGI_setup()': of request parameters. The rest of the function serves the purpose of filling the global parameters with the extracted new values. To accomplish this, the name of the requested resource is split into parts -and stored for later evaluation. If the request contains a '?', then +and stored for later evaluation. If the request contains a ‘?’, then the request has CGI variables seamlessly appended to the web address. -Everything in front of the '?' is split up into menu items, and -everything behind the '?' is a list of 'VARIABLE=VALUE' pairs (separated -by '&') that also need splitting. This way, CGI variables are isolated +Everything in front of the ‘?’ is split up into menu items, and +everything behind the ‘?’ is a list of ‘VARIABLE=VALUE’ pairs (separated +by ‘&’) that also need splitting. This way, CGI variables are isolated and stored. This procedure lacks recognition of special characters that are transmitted in coded form(1). Here, any optional request header and body parts are ignored. We do not need header parameters and the request body. However, when refining our approach or working with the -'POST' and 'PUT' methods, reading the header and body becomes +‘POST’ and ‘PUT’ methods, reading the header and body becomes inevitable. Header parameters should then be stored in a global array as well as the body. On each subsequent run through the main loop, one request from a -browser is received, evaluated, and answered according to the user's +browser is received, evaluated, and answered according to the user’s choice. This can be done by letting the value of the HTTP method guide -the main loop into execution of the procedure 'HandleGET()', which -evaluates the user's choice. In this case, we have only one +the main loop into execution of the procedure ‘HandleGET()’, which +evaluates the user’s choice. In this case, we have only one hierarchical level of menus, but in the general case, menus are nested. -The menu choices at each level are separated by '/', just as in file +The menu choices at each level are separated by ‘/’, just as in file names. Notice how simple it is to construct menus of arbitrary depth: function HandleGET() { @@ -1402,18 +1402,18 @@ names. Notice how simple it is to construct menus of arbitrary depth: The disadvantage of this approach is that our server is slow and can handle only one request at a time. Its main advantage, however, is that -the server consists of just one 'gawk' program. No need for installing -an 'httpd', and no need for static separate HTML files, CGI scripts, or -'root' privileges. This is rapid prototyping. This program can be +the server consists of just one ‘gawk’ program. No need for installing +an ‘httpd’, and no need for static separate HTML files, CGI scripts, or +‘root’ privileges. This is rapid prototyping. This program can be started on the same host that runs your browser. Then let your browser point to <http://localhost:8080>. It is also possible to include images into the HTML pages. Most -browsers support the not very well-known '.xbm' format, which may +browsers support the not very well-known ‘.xbm’ format, which may contain only monochrome pictures but is an ASCII format. Binary images are possible but not so easy to handle. Another way of including images is to generate them with a tool such as GNUPlot, by calling the tool -with the 'system()' function or through a pipe. +with the ‘system()’ function or through a pipe. ---------- Footnotes ---------- @@ -1426,27 +1426,27 @@ File: gawkinet.info, Node: CGI Lib, Prev: Interacting Service, Up: Interactin -------------------------- HTTP is like being married: you have to be able to handle whatever - you're given, while being very careful what you send back. - -- _Phil Smith III, + you’re given, while being very careful what you send back. + — _Phil Smith III, <http://www.netfunny.com/rhf/jokes/99/Mar/http.html>_ In *note A Web Service with Interaction: Interacting Service, we saw -the function 'CGI_setup()' as part of the web server "core logic" +the function ‘CGI_setup()’ as part of the web server “core logic” framework. The code presented there handles almost everything necessary -for CGI requests. One thing it doesn't do is handle encoded characters -in the requests. For example, an '&' is encoded as a percent sign -followed by the hexadecimal value: '%26'. These encoded values should +for CGI requests. One thing it doesn’t do is handle encoded characters +in the requests. For example, an ‘&’ is encoded as a percent sign +followed by the hexadecimal value: ‘%26’. These encoded values should be decoded. Following is a simple library to perform these tasks. This code is used for all web server examples throughout the rest of this Info file. If you want to use it for your own web server, store the -source code into a file named 'inetlib.awk'. Then you can include these +source code into a file named ‘inetlib.awk’. Then you can include these functions into your code by placing the following statement into your program (on the first line of your script): @include inetlib.awk But beware, this mechanism is only possible if you invoke your web -server script with 'igawk' instead of the usual 'awk' or 'gawk'. Here +server script with ‘igawk’ instead of the usual ‘awk’ or ‘gawk’. Here is the code: # CGI Library and core of a web server @@ -1531,10 +1531,10 @@ is the code: MENU[i] = _CGI_decode(MENU[i]) } - This isolates details in a single function, 'CGI_setup()'. Decoding + This isolates details in a single function, ‘CGI_setup()’. Decoding of encoded characters is pushed off to a helper function, -'_CGI_decode()'. The use of the leading underscore ('_') in the -function name is intended to indicate that it is an "internal" function, +‘_CGI_decode()’. The use of the leading underscore (‘_’) in the +function name is intended to indicate that it is an “internal” function, although there is nothing to enforce this: function _CGI_decode(str, hexdigs, i, pre, code1, code2, @@ -1567,10 +1567,10 @@ although there is nothing to enforce this: This works by splitting the string apart around an encoded character. The two digits are converted to lowercase characters and looked up in a -string of hex digits. Note that '0' is not in the string on purpose; -'index()' returns zero when it's not found, automatically giving the +string of hex digits. Note that ‘0’ is not in the string on purpose; +‘index()’ returns zero when it’s not found, automatically giving the correct value! Once the hexadecimal value is converted from characters -in a string into a numerical value, 'sprintf()' converts the value back +in a string into a numerical value, ‘sprintf()’ converts the value back into a real character. The following is a simple test harness for the above functions: @@ -1590,21 +1590,21 @@ above functions: And this is the result when we run it: $ gawk -f testserv.awk - -| MENU["4"] = www.gnu.org - -| MENU["5"] = cgi-bin - -| MENU["6"] = foo - -| MENU["1"] = http - -| MENU["2"] = - -| MENU["3"] = - -| PARAM["1"] = p1=stuff - -| PARAM["2"] = p2=stuff&junk - -| PARAM["3"] = percent=a % sign - -| GETARG["p1"] = stuff - -| GETARG["percent"] = a % sign - -| GETARG["p2"] = stuff&junk - -| GETARG["Method"] = GET - -| GETARG["Version"] = 1.0 - -| GETARG["URI"] = http://www.gnu.org/cgi-bin/foo?p1=stuff& + ⊣ MENU["4"] = www.gnu.org + ⊣ MENU["5"] = cgi-bin + ⊣ MENU["6"] = foo + ⊣ MENU["1"] = http + ⊣ MENU["2"] = + ⊣ MENU["3"] = + ⊣ PARAM["1"] = p1=stuff + ⊣ PARAM["2"] = p2=stuff&junk + ⊣ PARAM["3"] = percent=a % sign + ⊣ GETARG["p1"] = stuff + ⊣ GETARG["percent"] = a % sign + ⊣ GETARG["p2"] = stuff&junk + ⊣ GETARG["Method"] = GET + ⊣ GETARG["Version"] = 1.0 + ⊣ GETARG["URI"] = http://www.gnu.org/cgi-bin/foo?p1=stuff& p2=stuff%26junk&percent=a %25 sign @@ -1615,7 +1615,7 @@ File: gawkinet.info, Node: Simple Server, Next: Caveats, Prev: Interacting Se In the preceding node, we built the core logic for event-driven GUIs. In this node, we finally extend the core to a real application. No one -would actually write a commercial web server in 'gawk', but it is +would actually write a commercial web server in ‘gawk’, but it is instructive to see that it is feasible in principle. The application is ELIZA, the famous program by Joseph Weizenbaum @@ -1646,19 +1646,19 @@ and append the following code: TopFooter = "</BODY></HTML>" } - 'SetUpServer()' is similar to the previous example, except for -calling another function, 'SetUpEliza()'. This approach can be used to + ‘SetUpServer()’ is similar to the previous example, except for +calling another function, ‘SetUpEliza()’. This approach can be used to implement other kinds of servers. The only changes needed to do so are -hidden in the functions 'SetUpServer()' and 'HandleGET()'. Perhaps it -might be necessary to implement other HTTP methods. The 'igawk' program -that comes with 'gawk' may be useful for this process. +hidden in the functions ‘SetUpServer()’ and ‘HandleGET()’. Perhaps it +might be necessary to implement other HTTP methods. The ‘igawk’ program +that comes with ‘gawk’ may be useful for this process. When extending this example to a complete application, the first -thing to do is to implement the function 'SetUpServer()' to initialize +thing to do is to implement the function ‘SetUpServer()’ to initialize the HTML pages and some variables. These initializations determine the way your HTML pages look (colors, titles, menu items, etc.). - The function 'HandleGET()' is a nested case selection that decides + The function ‘HandleGET()’ is a nested case selection that decides which page the user wants to see next. Each nesting level refers to a menu level of the GUI. Each case implements a certain action of the menu. At the deepest level of case selection, the handler essentially @@ -1699,7 +1699,7 @@ Initially the user does not say anything; then ELIZA resets its money counter and asks the user to tell what comes to mind open-heartedly. The subsequent answers are converted to uppercase characters and stored for later comparison. ELIZA presents the bill when being confronted -with a sentence that contains the phrase "shut up." Otherwise, it looks +with a sentence that contains the phrase “shut up.” Otherwise, it looks for keywords in the sentence, conjugates the rest of the sentence, remembers the keyword for later use, and finally selects an answer from the set of possible answers: @@ -1747,9 +1747,9 @@ the set of possible answers: return answer } - In the long but simple function 'SetUpEliza()', you can see tables -for conjugation, keywords, and answers.(1) The associative array 'k' -contains indices into the array of answers 'r'. To choose an answer, + In the long but simple function ‘SetUpEliza()’, you can see tables +for conjugation, keywords, and answers.(1) The associative array ‘k’ +contains indices into the array of answers ‘r’. To choose an answer, ELIZA just picks an index randomly: function SetUpEliza() { @@ -1786,8 +1786,8 @@ ELIZA just picks an index randomly: } Some interesting remarks and details (including the original source -code of ELIZA) are found on Mark Humphrys's home page 'How my program -passed the Turing Test' (https://computing.dcu.ie/~humphrys/eliza.html). +code of ELIZA) are found on Mark Humphrys’s home page ‘How my program +passed the Turing Test’ (https://computing.dcu.ie/~humphrys/eliza.html). Wikipedia provides much background information about ELIZA (https://en.wikipedia.org/wiki/ELIZA), including the original design of the software and its early implementations. @@ -1795,7 +1795,7 @@ the software and its early implementations. ---------- Footnotes ---------- (1) The version shown here is abbreviated. The full version comes -with the 'gawk' distribution. +with the ‘gawk’ distribution. File: gawkinet.info, Node: Caveats, Next: Challenges, Prev: Simple Server, Up: Using Networking @@ -1809,19 +1809,19 @@ The behavior of a networked application sometimes looks noncausal because it is not reproducible in a strong sense. Whether a network application works or not sometimes depends on the following: - * How crowded the underlying network is + • How crowded the underlying network is - * If the party at the other end is running or not + • If the party at the other end is running or not - * The state of the party at the other end + • The state of the party at the other end The most difficult problems for a beginner arise from the hidden -states of the underlying network. After closing a TCP connection, it's +states of the underlying network. After closing a TCP connection, it’s often necessary to wait a short while before reopening the connection. Even more difficult is the establishment of a connection that previously -ended with a "broken pipe." Those connections have to "time out" for a +ended with a “broken pipe.” Those connections have to “time out” for a minute or so before they can reopen. Check this with the command -'netstat -a', which provides a list of still-active connections. +‘netstat -a’, which provides a list of still-active connections. File: gawkinet.info, Node: Challenges, Prev: Caveats, Up: Using Networking @@ -1835,7 +1835,7 @@ Loebner Prize is the first formal instantiation of a Turing Test. Hugh Loebner agreed with The Cambridge Center for Behavioral Studies to underwrite a contest designed to implement the Turing Test. Dr. Loebner pledged a Grand Prize of $100,000 for the first computer whose responses -were indistinguishable from a human's. Each year an annual prize of +were indistinguishable from a human’s. Each year an annual prize of $2000 and a bronze medal is awarded to the _most_ human computer. The winner of the annual contest is the best entry relative to other entries that year, irrespective of how good it is in an absolute sense. Here is @@ -1887,20 +1887,20 @@ behave so much like a human being that it can win this prize. It is quite common to let these programs talk to each other via network connections. But during the competition itself, the program and its computer have to be present at the place the competition is held. We -all would love to see a 'gawk' program win in such an event. Maybe it +all would love to see a ‘gawk’ program win in such an event. Maybe it is up to you to accomplish this? Some other ideas for useful networked applications: - * Read the file 'doc/awkforai.txt' in earlier 'gawk' + • Read the file ‘doc/awkforai.txt’ in earlier ‘gawk’ distributions.(1) It was written by Ronald P. Loui (at the time, Associate Professor of Computer Science, at Washington University in St. Louis, <loui@ai.wustl.edu>) and summarizes why he taught - 'gawk' to students of Artificial Intelligence. Here are some + ‘gawk’ to students of Artificial Intelligence. Here are some passages from the text: The GAWK manual can be consumed in a single lab session and the language can be mastered by the next morning by the - average student. GAWK's automatic initialization, implicit + average student. GAWK’s automatic initialization, implicit coercion, I/O support and lack of pointers forgive many of the mistakes that young programmers are likely to make. Those who have seen C but not mastered it are happy to see that GAWK @@ -1910,17 +1910,17 @@ is up to you to accomplish this? There are further simple answers. Probably the best is the fact that increasingly, undergraduate AI programming is involving the Web. Oren Etzioni (University of Washington, - Seattle) has for a while been arguing that the "softbot" is - replacing the mechanical engineers' robot as the most + Seattle) has for a while been arguing that the “softbot” is + replacing the mechanical engineers’ robot as the most glamorous AI testbed. If the artifact whose behavior needs to be controlled in an intelligent way is the software agent, then a language that is well-suited to controlling the software environment is the appropriate language. That would imply a scripting language. If the robot is KAREL, then the - right language is "turn left; turn right." If the robot is + right language is “turn left; turn right.” If the robot is Netscape, then the right language is something that can - generate 'netscape -remote - 'openURL(http://cs.wustl.edu/~loui)'' with elan. + generate ‘netscape -remote + 'openURL(http://cs.wustl.edu/~loui)'’ with elan. ... AI programming requires high-level thinking. There have always been a few gifted programmers who can write high-level @@ -1934,17 +1934,17 @@ is up to you to accomplish this? strings. A language that provides the best support for string processing in the end provides the best support for logic, for the exploration of various logics, and for most forms of - symbolic processing that AI might choose to call "reasoning" - instead of "logic." The implication is that PROLOG, which + symbolic processing that AI might choose to call “reasoning” + instead of “logic.” The implication is that PROLOG, which saves the AI programmer from having to write a unifier, saves perhaps two dozen lines of GAWK code at the expense of strongly biasing the logic and representational expressiveness of any approach. - Now that 'gawk' itself can connect to the Internet, it should be + Now that ‘gawk’ itself can connect to the Internet, it should be obvious that it is suitable for writing intelligent web agents. - * 'awk' is strong at pattern recognition and string processing. So, + • ‘awk’ is strong at pattern recognition and string processing. So, it is well suited to the classic problem of language translation. A first try could be a program that knows the 100 most frequent English words and their counterparts in German or French. The @@ -1955,9 +1955,9 @@ is up to you to accomplish this? in return. As soon as this works, more effort can be spent on a real translation program. - * Another dialogue-oriented application (on the verge of ridicule) is - the email "support service." Troubled customers write an email to - an automatic 'gawk' service that reads the email. It looks for + • Another dialogue-oriented application (on the verge of ridicule) is + the email “support service.” Troubled customers write an email to + an automatic ‘gawk’ service that reads the email. It looks for keywords in the mail and assembles a reply email accordingly. By carefully investigating the email header, and repeating these keywords through the reply email, it is rather simple to give the @@ -1968,7 +1968,7 @@ is up to you to accomplish this? ---------- Footnotes ---------- - (1) The file is no longer distributed with 'gawk', since the + (1) The file is no longer distributed with ‘gawk’, since the copyright on the file is not clear. @@ -1981,24 +1981,24 @@ In this major node, we look at a number of self-contained scripts, with an emphasis on concise networking. Along the way, we work towards creating building blocks that encapsulate often-needed functions of the networking world, show new techniques that broaden the scope of problems -that can be solved with 'gawk', and explore leading edge technology that +that can be solved with ‘gawk’, and explore leading edge technology that may shape the future of networking. We often refer to the site-independent core of the server that we built in *note A Simple Web Server: Simple Server. When building new and nontrivial servers, we always copy this building block and append -new instances of the two functions 'SetUpServer()' and 'HandleGET()'. +new instances of the two functions ‘SetUpServer()’ and ‘HandleGET()’. This makes a lot of sense, since this scheme of event-driven -execution provides 'gawk' with an interface to the most widely accepted -standard for GUIs: the web browser. Now, 'gawk' can rival even Tcl/Tk. +execution provides ‘gawk’ with an interface to the most widely accepted +standard for GUIs: the web browser. Now, ‘gawk’ can rival even Tcl/Tk. - Tcl and 'gawk' have much in common. Both are simple scripting + Tcl and ‘gawk’ have much in common. Both are simple scripting languages that allow us to quickly solve problems with short programs. -But Tcl has Tk on top of it, and 'gawk' had nothing comparable up to +But Tcl has Tk on top of it, and ‘gawk’ had nothing comparable up to now. While Tcl needs a large and ever-changing library (Tk, which was -originally bound to the X Window System), 'gawk' needs just the -networking interface and some kind of browser on the client's side. +originally bound to the X Window System), ‘gawk’ needs just the +networking interface and some kind of browser on the client’s side. Besides better portability, the most important advantage of this approach (embracing well-established standards such HTTP and HTML) is that _we do not need to change the language_. We let others do the work @@ -2024,20 +2024,20 @@ File: gawkinet.info, Node: PANIC, Next: GETURL, Prev: Some Applications and T 3.1 PANIC: An Emergency Web Server ================================== -At first glance, the '"Hello, world"' example in *note A Primitive Web +At first glance, the ‘"Hello, world"’ example in *note A Primitive Web Service: Primitive Service, seems useless. By adding just a few lines, we can turn it into something useful. The PANIC program tells everyone who connects that the local site is not working. When a web server breaks down, it makes a difference if -customers get a strange "network unreachable" message, or a short +customers get a strange “network unreachable” message, or a short message telling them that the server has a problem. In such an emergency, the hard disk and everything on it (including the regular web service) may be unavailable. Rebooting the web server off a USB drive makes sense in this setting. To use the PANIC program as an emergency web server, all you need are -the 'gawk' executable and the program below on a USB drive. By default, +the ‘gawk’ executable and the program below on a USB drive. By default, it connects to port 8080. A different value may be supplied on the command line: @@ -2070,7 +2070,7 @@ GETURL is a versatile building block for shell scripts that need to retrieve files from the Internet. It takes a web address as a command-line parameter and tries to retrieve the contents of this address. The contents are printed to standard output, while the header -is printed to '/dev/stderr'. A surrounding shell script could analyze +is printed to ‘/dev/stderr’. A surrounding shell script could analyze the contents and extract the text or the links. An ASCII browser could be written around GETURL. But more interestingly, web robots are straightforward to write on top of GETURL. On the Internet, you can find @@ -2080,10 +2080,10 @@ usually much more complex internally and at least 10 times as big. At first, GETURL checks if it was called with exactly one web address. Then, it checks if the user chose to use a special proxy server whose name is handed over in a variable. By default, it is -assumed that the local machine serves as proxy. GETURL uses the 'GET' +assumed that the local machine serves as proxy. GETURL uses the ‘GET’ method by default to access the web page. By handing over the name of a -different method (such as 'HEAD'), it is possible to choose a different -behavior. With the 'HEAD' method, the user does not receive the body of +different method (such as ‘HEAD’), it is possible to choose a different +behavior. With the ‘HEAD’ method, the user does not receive the body of the page content, but does receive the header: BEGIN { @@ -2114,7 +2114,7 @@ the page content, but does receive the header: This program can be changed as needed, but be careful with the last lines. Make sure transmission of binary data is not corrupted by additional line breaks. Even as it is now, the byte sequence -'"\r\n\r\n"' would disappear if it were contained in binary data. Don't +‘"\r\n\r\n"’ would disappear if it were contained in binary data. Don’t get caught in a trap when trying a quick fix on this one. @@ -2131,27 +2131,27 @@ GNU/Linux in embedded PCs. These systems are small and usually do not have a keyboard or a display. Therefore it is difficult to set up their configuration. There are several widespread ways to set them up: - * DIP switches + • DIP switches - * Read Only Memories such as EPROMs + • Read Only Memories such as EPROMs - * Serial lines or some kind of keyboard + • Serial lines or some kind of keyboard - * Network connections via 'telnet' or SNMP + • Network connections via ‘telnet’ or SNMP - * HTTP connections with HTML GUIs + • HTTP connections with HTML GUIs In this node, we look at a solution that uses HTTP connections to control variables of an embedded system that are stored in a file. Since embedded systems have tight limits on resources like memory, it is difficult to employ advanced techniques such as SNMP and HTTP servers. -'gawk' fits in quite nicely with its single executable which needs just +‘gawk’ fits in quite nicely with its single executable which needs just a short script to start working. The following program stores the variables in a file, and a concurrent process in the embedded system may read the file. The program uses the site-independent part of the simple web server that we developed in *note A Web Service with Interaction: Interacting Service. As mentioned there, all we have to do is to write -two new procedures 'SetUpServer()' and 'HandleGET()': +two new procedures ‘SetUpServer()’ and ‘HandleGET()’: function SetUpServer() { TopHeader = "<HTML><title>Remote Configuration</title>" @@ -2168,18 +2168,18 @@ two new procedures 'SetUpServer()' and 'HandleGET()': if (ConfigFile == "") ConfigFile = "config.asc" } - The function 'SetUpServer()' initializes the top level HTML texts as + The function ‘SetUpServer()’ initializes the top level HTML texts as usual. It also initializes the name of the file that contains the configuration parameters and their values. In case the user supplies a name from the command line, that name is used. The file is expected to contain one parameter per line, with the name of the parameter in column one and the value in column two. - The function 'HandleGET()' reflects the structure of the menu tree as + The function ‘HandleGET()’ reflects the structure of the menu tree as usual. The first menu choice tells the user what this is all about. The second choice reads the configuration file line by line and stores the parameters and their values. Notice that the record separator for -this file is '"\n"', in contrast to the record separator for HTTP. The +this file is ‘"\n"’, in contrast to the record separator for HTTP. The third menu choice builds an HTML table to show the contents of the configuration file just read. The fourth choice does the real work of changing parameters, and the last one just saves the configuration into @@ -2244,15 +2244,15 @@ bookmark file with pointers to interesting web sites. It is impossible to regularly check by hand if any of these sites have changed. A program is needed to automatically look at the headers of web pages and tell which ones have changed. URLCHK does the comparison after using -GETURL with the 'HEAD' method to retrieve the header. +GETURL with the ‘HEAD’ method to retrieve the header. Like GETURL, this program first checks that it is called with exactly one command-line parameter. URLCHK also takes the same command-line -variables 'Proxy' and 'ProxyPort' as GETURL, because these variables are +variables ‘Proxy’ and ‘ProxyPort’ as GETURL, because these variables are handed over to GETURL for each URL that gets checked. The one and only parameter is the name of a file that contains one line for each URL. In the first column, we find the URL, and the second and third columns hold -the length of the URL's body when checked for the two last times. Now, +the length of the URL’s body when checked for the two last times. Now, we follow this plan: 1. Read the URLs from the file and remember their most recent lengths @@ -2301,11 +2301,11 @@ those lines that differ in their second and third columns: Another thing that may look strange is the way GETURL is called. Before calling GETURL, we have to check if the proxy variables need to be passed on. If so, we prepare strings that will become part of the -command line later. In 'GetHeader', we store these strings together +command line later. In ‘GetHeader’, we store these strings together with the longest part of the command line. Later, in the loop over the -URLs, 'GetHeader' is appended with the URL and a redirection operator to -form the command that reads the URL's header over the Internet. GETURL -always sends the headers to '/dev/stderr'. That is the reason why we +URLs, ‘GetHeader’ is appended with the URL and a redirection operator to +form the command that reads the URL’s header over the Internet. GETURL +always sends the headers to ‘/dev/stderr’. That is the reason why we need the redirection operator to have the header piped in. This program is not perfect because it assumes that changing URLs @@ -2335,20 +2335,20 @@ the Bourne shell: Notice that the regular expression for URLs is rather crude. A precise regular expression is much more complex. But this one works rather well. One problem is that it is unable to find internal links of -an HTML document. Another problem is that 'ftp', 'telnet', 'news', -'mailto', and other kinds of links are missing in the regular +an HTML document. Another problem is that ‘ftp’, ‘telnet’, ‘news’, +‘mailto’, and other kinds of links are missing in the regular expression. However, it is straightforward to add them, if doing so is necessary for other tasks. This program reads an HTML file and prints all the HTTP links that it -finds. It relies on 'gawk''s ability to use regular expressions as the -record separator. With 'RS' set to a regular expression that matches +finds. It relies on ‘gawk’’s ability to use regular expressions as the +record separator. With ‘RS’ set to a regular expression that matches links, the second action is executed each time a non-empty link is -found. We can find the matching link itself in 'RT'. +found. We can find the matching link itself in ‘RT’. - The action could use the 'system()' function to let another GETURL + The action could use the ‘system()’ function to let another GETURL retrieve the page, but here we use a different approach. This simple -program prints shell commands that can be piped into 'sh' for execution. +program prints shell commands that can be piped into ‘sh’ for execution. This way it is possible to first extract the links, wrap shell commands around them, and pipe all the shell commands into a file. After editing the file, execution of the file retrieves only those files that we @@ -2358,10 +2358,10 @@ pages like this: gawk -f geturl.awk http://www.suse.de | gawk -f webgrab.awk | sh After this, you will find the contents of all referenced documents in -files named 'doc*.html' even if they do not contain HTML code. The most +files named ‘doc*.html’ even if they do not contain HTML code. The most annoying thing is that we always have to pass the proxy to GETURL. If you do not like to see the headers of the web pages appear on the -screen, you can redirect them to '/dev/null'. Watching the headers +screen, you can redirect them to ‘/dev/null’. Watching the headers appear can be quite interesting, because it reveals interesting details such as which web server the companies use. Now, it is clear how the clever marketing people use web robots to determine the market shares of @@ -2371,11 +2371,11 @@ Microsoft and Netscape in the web server market. firewall. After attaching a browser to port 80, we usually catch a glimpse of the bright side of the server (its home page). With a tool like GETURL at hand, we are able to discover some of the more concealed -or even "indecent" services (i.e., lacking conformity to standards of +or even “indecent” services (i.e., lacking conformity to standards of quality). It can be exciting to see the fancy CGI scripts that lie there, revealing the inner workings of the server, ready to be called: - * With a command such as: + • With a command such as: gawk -f geturl.awk http://any.host.on.the.net/cgi-bin/ @@ -2386,18 +2386,18 @@ there, revealing the inner workings of the server, ready to be called: If there are subdirectories with configuration data of the web server, this can also be quite interesting to read. - * The well-known Apache web server usually has its CGI files in the - directory '/cgi-bin'. There you can often find the scripts - 'test-cgi' and 'printenv'. Both tell you some things about the + • The well-known Apache web server usually has its CGI files in the + directory ‘/cgi-bin’. There you can often find the scripts + ‘test-cgi’ and ‘printenv’. Both tell you some things about the current connection and the installation of the web server. Just call: gawk -f geturl.awk http://any.host.on.the.net/cgi-bin/test-cgi gawk -f geturl.awk http://any.host.on.the.net/cgi-bin/printenv - * Sometimes it is even possible to retrieve system files like the web - server's log file--possibly containing customer data--or even the - file '/etc/passwd'. (We don't recommend this!) + • Sometimes it is even possible to retrieve system files like the web + server’s log file—possibly containing customer data—or even the + file ‘/etc/passwd’. (We don’t recommend this!) *Caution:* Although this may sound funny or simply irrelevant, we are talking about severe security holes. Try to explore your own system @@ -2410,7 +2410,7 @@ File: gawkinet.info, Node: STATIST, Next: MAZE, Prev: WEBGRAB, Up: Some Appl 3.6 STATIST: Graphing a Statistical Distribution ================================================ - + | : : sample 1 :::: | | : : sample 2 ---- | 0.3 |- : :_ -| @@ -2435,28 +2435,28 @@ File: gawkinet.info, Node: STATIST, Next: MAZE, Prev: WEBGRAB, Up: Some Appl -10 5 0 5 10" -In the HTTP server examples we've shown thus far, we never present an +In the HTTP server examples we’ve shown thus far, we never present an image to the browser and its user. Presenting images is one task. Generating images that reflect some user input and presenting these dynamically generated images is another. In this node, we use GNUPlot -for generating '.png', '.ps', or '.gif' files.(1) +for generating ‘.png’, ‘.ps’, or ‘.gif’ files.(1) The program we develop takes the statistical parameters of two samples and computes the t-test statistics. As a result, we get the probabilities that the means and the variances of both samples are the same. In order to let the user check plausibility, the program presents an image of the distributions. The statistical computation follows -'Numerical Recipes in C: The Art of Scientific Computing' by William H. +‘Numerical Recipes in C: The Art of Scientific Computing’ by William H. Press, Saul A. Teukolsky, William T. Vetterling, and Brian P. Flannery. -Since 'gawk' does not have a built-in function for the computation of -the beta function, we use the 'ibeta()' function of GNUPlot. As a side +Since ‘gawk’ does not have a built-in function for the computation of +the beta function, we use the ‘ibeta()’ function of GNUPlot. As a side effect, we learn how to use GNUPlot as a sophisticated calculator. The -comparison of means is done as in 'tutest', paragraph 14.2, page 613, -and the comparison of variances is done as in 'ftest', page 611 in -'Numerical Recipes'. +comparison of means is done as in ‘tutest’, paragraph 14.2, page 613, +and the comparison of variances is done as in ‘ftest’, page 611 in +‘Numerical Recipes’. As usual, we take the site-independent code for servers and append -our own functions 'SetUpServer()' and 'HandleGET()': +our own functions ‘SetUpServer()’ and ‘HandleGET()’: function SetUpServer() { TopHeader = "<HTML><title>Statistics with GAWK</title>" @@ -2472,20 +2472,20 @@ our own functions 'SetUpServer()' and 'HandleGET()': } Here, you see the menu structure that the user sees. Later, we will -see how the program structure of the 'HandleGET()' function reflects the +see how the program structure of the ‘HandleGET()’ function reflects the menu structure. What is missing here is the link for the image we generate. In an event-driven environment, request, generation, and delivery of images are separated. - Notice the way we initialize the 'GnuPlot' command string for the + Notice the way we initialize the ‘GnuPlot’ command string for the pipe. By default, GNUPlot outputs the generated image via standard -output, as well as the results of 'print'(ed) calculations via standard +output, as well as the results of ‘print’(ed) calculations via standard error. The redirection causes standard error to be mixed into standard -output, enabling us to read results of calculations with 'getline'. By +output, enabling us to read results of calculations with ‘getline’. By initializing the statistical parameters with some meaningful defaults, we make sure the user gets an image the first time he uses the program. - Following is the rather long function 'HandleGET()', which implements + Following is the rather long function ‘HandleGET()’, which implements the contents of this service by reacting to the different kinds of requests from the browser. Before you start playing with this script, make sure that your browser supports JavaScript and that it also has @@ -2572,16 +2572,16 @@ menu choice. The third menu choice shows us that generation and presentation of an image are two separate actions. While the latter takes place quite instantly in the third menu choice, the former takes place in the much longer second choice. Image data passes from the -generating action to the presenting action via the variable 'Image' that -contains a complete '.png' image, which is otherwise stored in a file. -If you prefer '.ps' or '.gif' images over the default '.png' images, you +generating action to the presenting action via the variable ‘Image’ that +contains a complete ‘.png’ image, which is otherwise stored in a file. +If you prefer ‘.ps’ or ‘.gif’ images over the default ‘.png’ images, you may select these options by uncommenting the appropriate lines. But remember to do so in two places: when telling GNUPlot which kind of images to generate, and when transmitting the image at the end of the program. - Looking at the end of the program, the way we pass the 'Content-type' -to the browser is a bit unusual. It is appended to the 'OK' of the + Looking at the end of the program, the way we pass the ‘Content-type’ +to the browser is a bit unusual. It is appended to the ‘OK’ of the first header line to make sure the type information becomes part of the header. The other variables that get transmitted across the network are made empty, because in this case we do not have an HTML document to @@ -2589,22 +2589,22 @@ transmit, but rather raw image data to contain in the body. Most of the work is done in the second menu choice. It starts with a strange JavaScript code snippet. When first implementing this server, -we used a short '"<IMG SRC=" MyPrefix "/Image>"' here. But then +we used a short ‘"<IMG SRC=" MyPrefix "/Image>"’ here. But then browsers got smarter and tried to improve on speed by requesting the image and the HTML code at the same time. When doing this, the browser tries to build up a connection for the image request while the request for the HTML text is not yet completed. The browser tries to connect to -the 'gawk' server on port 8080 while port 8080 is still in use for +the ‘gawk’ server on port 8080 while port 8080 is still in use for transmission of the HTML text. The connection for the image cannot be -built up, so the image appears as "broken" in the browser window. We +built up, so the image appears as “broken” in the browser window. We solved this problem by telling the browser to open a separate window for the image, but only after a delay of 1000 milliseconds. By this time, the server should be ready for serving the next request. But there is one more subtlety in the JavaScript code. Each time the JavaScript code opens a window for the image, the name of the image is -appended with a timestamp ('systime()'). Why this constant change of -name for the image? Initially, we always named the image 'Image', but +appended with a timestamp (‘systime()’). Why this constant change of +name for the image? Initially, we always named the image ‘Image’, but then the Netscape browser noticed the name had _not_ changed since the previous request and displayed the previous image (caching behavior). The server core is implemented so that browsers are told _not_ to cache @@ -2626,24 +2626,24 @@ available. Therefore we insert the JavaScript code here to initiate the opening of the image in a separate window. Then, we prepare some variables that will be passed to GNUPlot for calculation of the probabilities. Prior to reading the results, we must temporarily change -'RS' because GNUPlot separates lines with newlines. After instructing -GNUPlot to generate a '.png' (or '.ps' or '.gif') image, we initiate the +‘RS’ because GNUPlot separates lines with newlines. After instructing +GNUPlot to generate a ‘.png’ (or ‘.ps’ or ‘.gif’) image, we initiate the insertion of some text, explaining the resulting probabilities. The -final 'plot' command actually generates the image data. This raw binary +final ‘plot’ command actually generates the image data. This raw binary has to be read in carefully without adding, changing, or deleting a -single byte. Hence the unusual initialization of 'Image' and completion -with a 'while' loop. +single byte. Hence the unusual initialization of ‘Image’ and completion +with a ‘while’ loop. When using this server, it soon becomes clear that it is far from being perfect. It mixes source code of six scripting languages or protocols: - * GNU 'awk' implements a server for the protocol: - * HTTP which transmits: - * HTML text which contains a short piece of: - * JavaScript code opening a separate window. - * A Bourne shell script is used for piping commands into: - * GNUPlot to generate the image to be opened. + • GNU ‘awk’ implements a server for the protocol: + • HTTP which transmits: + • HTML text which contains a short piece of: + • JavaScript code opening a separate window. + • A Bourne shell script is used for piping commands into: + • GNUPlot to generate the image to be opened. After all this work, the GNUPlot image opens in the JavaScript window where it can be viewed by the user. @@ -2656,11 +2656,11 @@ negative variances causes invalid results. ---------- Footnotes ---------- (1) Due to licensing problems, the default installation of GNUPlot -disables the generation of '.gif' files. If your installed version does -not accept 'set term gif', just download and install the most recent +disables the generation of ‘.gif’ files. If your installed version does +not accept ‘set term gif’, just download and install the most recent version of GNUPlot and the GD library (https://libgd.github.io/) by Thomas Boutell. Otherwise you still have the chance to generate some -ASCII-art style images with GNUPlot by using 'set term dumb'. (We tried +ASCII-art style images with GNUPlot by using ‘set term dumb’. (We tried it and it worked.) @@ -2670,19 +2670,19 @@ File: gawkinet.info, Node: MAZE, Next: MOBAGWHO, Prev: STATIST, Up: Some App =================================================== In the long run, every program becomes rococo, and then rubble. - -- _Alan Perlis_ + — _Alan Perlis_ - By now, we know how to present arbitrary 'Content-type's to a + By now, we know how to present arbitrary ‘Content-type’s to a browser. In this node, our server presents a 3D world to our browser. The 3D world is described in a scene description language (VRML, Virtual Reality Modeling Language) that allows us to travel through a perspective view of a 2D maze with our browser. Browsers with a VRML plugin enable exploration of this technology. We could do one of those -boring 'Hello world' examples here, that are usually presented when +boring ‘Hello world’ examples here, that are usually presented when introducing novices to VRML. If you have never written any VRML code, have a look at the VRML FAQ. Presenting a static VRML scene is a bit -trivial; in order to expose 'gawk''s capabilities, we will present a -dynamically generated VRML scene. The function 'SetUpServer()' is very +trivial; in order to expose ‘gawk’’s capabilities, we will present a +dynamically generated VRML scene. The function ‘SetUpServer()’ is very simple because it only sets the default HTML page and initializes the random number generator. As usual, the surrounding server lets you browse the maze. @@ -2699,14 +2699,14 @@ browse the maze. srand() } - The function 'HandleGET()' is a bit longer because it first computes + The function ‘HandleGET()’ is a bit longer because it first computes the maze and afterwards generates the VRML code that is sent across the network. As shown in the STATIST example (*note STATIST::), we set the type of the content to VRML and then store the VRML representation of the maze as the page content. We assume that the maze is stored in a 2D array. Initially, the maze consists of walls only. Then, we add an entry and an exit to the maze and let the rest of the work be done by -the function 'MakeMaze()'. Now, only the wall fields are left in the +the function ‘MakeMaze()’. Now, only the wall fields are left in the maze. By iterating over the these fields, we generate one line of VRML code for each wall field. @@ -2756,8 +2756,8 @@ code for each wall field. } } - Finally, we have a look at 'MakeMaze()', the function that generates -the 'Maze' array. When entered, this function assumes that the array + Finally, we have a look at ‘MakeMaze()’, the function that generates +the ‘Maze’ array. When entered, this function assumes that the array has been initialized so that each element represents a wall element and the maze is initially full of wall elements. Only the entrance and the exit of the maze should have been left free. The parameters of the @@ -2774,7 +2774,7 @@ but it is done much simpler recursively. Notice that elements with coordinates that are both odd are assumed to be on our way through the maze and the generating process cannot -terminate as long as there is such an element not being 'delete'd. All +terminate as long as there is such an element not being ‘delete’d. All other elements are potentially part of the wall. function MakeMaze(x, y) { @@ -2805,52 +2805,52 @@ File: gawkinet.info, Node: MOBAGWHO, Next: STOXPRED, Prev: MAZE, Up: Some Ap make it so simple that there are obviously no deficiencies, and the other way is to make it so complicated that there are no obvious deficiencies. - -- _C.A.R. Hoare_ + — _C.A.R. Hoare_ - A "mobile agent" is a program that can be dispatched from a computer + A “mobile agent” is a program that can be dispatched from a computer and transported to a remote server for execution. This is called -"migration", which means that a process on another system is started +“migration”, which means that a process on another system is started that is independent from its originator. Ideally, it wanders through a network while working for its creator or owner. In places like the UMBC Agent Web, people are quite confident that (mobile) agents are a software engineering paradigm that enables us to significantly increase the efficiency of our work. Mobile agents could become the mediators between users and the networking world. For an unbiased view at this -technology, see the remarkable paper 'Mobile Agents: Are they a good -idea?'.(1) +technology, see the remarkable paper ‘Mobile Agents: Are they a good +idea?’.(1) When trying to migrate a process from one system to another, a server process is needed on the receiving side. Depending on the kind of server process, several ways of implementation come to mind. How the process is implemented depends upon the kind of server process: - * HTTP can be used as the protocol for delivery of the migrating + • HTTP can be used as the protocol for delivery of the migrating process. In this case, we use a common web server as the receiving server process. A universal CGI script mediates between migrating process and web server. Each server willing to accept migrating agents makes this universal service available. HTTP supplies the - 'POST' method to transfer some data to a file on the web server. - When a CGI script is called remotely with the 'POST' method instead - of the usual 'GET' method, data is transmitted from the client - process to the standard input of the server's CGI script. So, to + ‘POST’ method to transfer some data to a file on the web server. + When a CGI script is called remotely with the ‘POST’ method instead + of the usual ‘GET’ method, data is transmitted from the client + process to the standard input of the server’s CGI script. So, to implement a mobile agent, we must not only write the agent program to start on the client side, but also the CGI script to receive the agent on the server side. - * The 'PUT' method can also be used for migration. HTTP does not - require a CGI script for migration via 'PUT'. However, with common + • The ‘PUT’ method can also be used for migration. HTTP does not + require a CGI script for migration via ‘PUT’. However, with common web servers there is no advantage to this solution, because web servers such as Apache require explicit activation of a special - 'PUT' script. + ‘PUT’ script. - * 'Agent Tcl' pursues a different course; it relies on a dedicated + • ‘Agent Tcl’ pursues a different course; it relies on a dedicated server process with a dedicated protocol specialized for receiving mobile agents. Our agent example abuses a common web server as a migration tool. So, it needs a universal CGI script on the receiving side (the web -server). The receiving script is activated with a 'POST' request when -placed into a location like '/httpd/cgi-bin/PostAgent.sh'. +server). The receiving script is activated with a ‘POST’ request when +placed into a location like ‘/httpd/cgi-bin/PostAgent.sh’. #!/bin/sh MobAg=/tmp/MobileAgent.$$ @@ -2862,9 +2862,9 @@ placed into a location like '/httpd/cgi-bin/PostAgent.sh'. gawk 'BEGIN { print "\r\nAgent started" }' rm $MobAg # delete script file of agent - By making its process id ('$$') part of the unique file name, the + By making its process id (‘$$’) part of the unique file name, the script avoids conflicts between concurrent instances of the script. -First, all lines from standard input (the mobile agent's source code) +First, all lines from standard input (the mobile agent’s source code) are copied into this unique file. Then, the agent is started as a concurrent process and a short message reporting this fact is sent to the submitting client. Finally, the script file of the mobile agent is @@ -2881,14 +2881,14 @@ Self-Reference Each migrating instance of an agent is started in a way that enables it to read its own source code from standard input and use the code for subsequent migrations. This is necessary because it - needs to treat the agent's code as data to transmit. 'gawk' is not + needs to treat the agent’s code as data to transmit. ‘gawk’ is not the ideal language for such a job. Lisp and Tcl are more suitable because they do not make a distinction between program code and data. Independence After migration, the agent is not linked to its former home in any - way. By reporting 'Agent started', it waves "Goodbye" to its + way. By reporting ‘Agent started’, it waves “Goodbye” to its origin. The originator may choose to terminate or not. The originating agent itself is started just like any other @@ -2897,10 +2897,10 @@ letting the name of the original host migrate with the agent, the agent that migrates to a host far away from its origin can report the result back home. Having arrived at the end of the journey, the agent establishes a connection and reports the results. This is the reason -for determining the name of the host with 'uname -n' and storing it in -'MyOrigin' for later use. We may also set variables with the '-v' +for determining the name of the host with ‘uname -n’ and storing it in +‘MyOrigin’ for later use. We may also set variables with the ‘-v’ option from the command line. This interactivity is only of importance -in the context of starting a mobile agent; therefore this 'BEGIN' +in the context of starting a mobile agent; therefore this ‘BEGIN’ pattern and its action do not take part in migration: BEGIN { @@ -2919,7 +2919,7 @@ pattern and its action do not take part in migration: } } - Since 'gawk' cannot manipulate and transmit parts of the program + Since ‘gawk’ cannot manipulate and transmit parts of the program directly, the source code is read and stored in strings. Therefore, the program scans itself for the beginning and the ending of functions. Each line in between is appended to the code string until the end of the @@ -2939,7 +2939,7 @@ that the order of the functions is preserved during migration: The web server code in *note A Web Service with Interaction: Interacting Service, was first developed as a site-independent core. -Likewise, the 'gawk'-based mobile agent starts with an agent-independent +Likewise, the ‘gawk’-based mobile agent starts with an agent-independent core, to which can be appended application-dependent functions. What follows is the only application-independent function needed for the mobile agent: @@ -2963,54 +2963,54 @@ mobile agent: close(HttpService) } - The 'migrate()' function prepares the aforementioned strings + The ‘migrate()’ function prepares the aforementioned strings containing the program code and transmits them to a server. A -consequence of this modular approach is that the 'migrate()' function -takes some parameters that aren't needed in this application, but that -will be in future ones. Its mandatory parameter 'Destination' holds the +consequence of this modular approach is that the ‘migrate()’ function +takes some parameters that aren’t needed in this application, but that +will be in future ones. Its mandatory parameter ‘Destination’ holds the name (or IP address) of the server that the agent wants as a host for -its code. The optional parameter 'MobCode' may contain some 'gawk' code +its code. The optional parameter ‘MobCode’ may contain some ‘gawk’ code that is inserted during migration in front of all other code. The -optional parameter 'Label' may contain a string that tells the agent +optional parameter ‘Label’ may contain a string that tells the agent what to do in program execution after arrival at its new home site. One of the serious obstacles in implementing a framework for mobile agents is that it does not suffice to migrate the code. It is also necessary -to migrate the state of execution of the agent. In contrast to 'Agent -Tcl', this program does not try to migrate the complete set of +to migrate the state of execution of the agent. In contrast to ‘Agent +Tcl’, this program does not try to migrate the complete set of variables. The following conventions apply: - * Each variable in an agent program is local to the current host and + • Each variable in an agent program is local to the current host and does _not_ migrate. - * The array 'MOBFUN' shown above is an exception. It is handled by - the function 'migrate()' and does migrate with the application. + • The array ‘MOBFUN’ shown above is an exception. It is handled by + the function ‘migrate()’ and does migrate with the application. - * The other exception is the array 'MOBVAR'. Each variable that + • The other exception is the array ‘MOBVAR’. Each variable that takes part in migration has to be an element of this array. - 'migrate()' also takes care of this. + ‘migrate()’ also takes care of this. - Now it's clear what happens to the 'Label' parameter of the function -'migrate()'. It is copied into 'MOBVAR["Label"]' and travels alongside + Now it’s clear what happens to the ‘Label’ parameter of the function +‘migrate()’. It is copied into ‘MOBVAR["Label"]’ and travels alongside the other data. Since traveling takes place via HTTP, records must be -separated with '"\r\n"' in 'RS' and 'ORS' as usual. The code assembly +separated with ‘"\r\n"’ in ‘RS’ and ‘ORS’ as usual. The code assembly for migration takes place in three steps: - * Iterate over 'MOBFUN' to collect all functions verbatim. + • Iterate over ‘MOBFUN’ to collect all functions verbatim. - * Prepare a 'BEGIN' pattern and put assignments to mobile variables + • Prepare a ‘BEGIN’ pattern and put assignments to mobile variables into the action part. - * Transmission itself resembles GETURL: the header with the request - and the 'Content-length' is followed by the body. In case there is + • Transmission itself resembles GETURL: the header with the request + and the ‘Content-length’ is followed by the body. In case there is any reply over the network, it is read completely and echoed to standard output to avoid irritating the server. The application-independent framework is now almost complete. What -follows is the 'END' pattern which executes when the mobile agent has +follows is the ‘END’ pattern which executes when the mobile agent has finished reading its own code. First, it checks whether it is already running on a remote host or not. In case initialization has not yet -taken place, it starts 'MyInit()'. Otherwise (later, on a remote host), -it starts 'MyJob()': +taken place, it starts ‘MyInit()’. Otherwise (later, on a remote host), +it starts ‘MyJob()’: END { if (ARGC != 2) exit # stop when called with wrong parameters @@ -3020,9 +3020,9 @@ it starts 'MyJob()': MyJob() # so we do our job } - All that's left to extend the framework into a complete application -is to write two application-specific functions: 'MyInit()' and -'MyJob()'. Keep in mind that the former is executed once on the + All that’s left to extend the framework into a complete application +is to write two application-specific functions: ‘MyInit()’ and +‘MyJob()’. Keep in mind that the former is executed once on the originating host, while the latter is executed after each migration: function MyInit() { @@ -3036,11 +3036,11 @@ originating host, while the latter is executed after each migration: } As mentioned earlier, this agent takes the name of its origin -('MyOrigin') with it. Then, it takes the name of its first destination +(‘MyOrigin’) with it. Then, it takes the name of its first destination and goes there for further work. Notice that this name has the port number of the web server appended to the name of the server, because the -function 'migrate()' needs it this way to create the 'HttpService' -variable. Finally, it waits for the result to arrive. The 'MyJob()' +function ‘migrate()’ needs it this way to create the ‘HttpService’ +variable. Finally, it waits for the result to arrive. The ‘MyJob()’ function runs on the remote host: function MyJob() { @@ -3060,20 +3060,20 @@ function runs on the remote host: } } - After migrating, the first thing to do in 'MyJob()' is to delete the + After migrating, the first thing to do in ‘MyJob()’ is to delete the name of the current host from the list of hosts to visit. Now, it is -time to start the real work by appending the host's name to the result +time to start the real work by appending the host’s name to the result string, and reading line by line who is logged in on this host. A very -annoying circumstance is the fact that the elements of 'MOBVAR' cannot -hold the newline character ('"\n"'). If they did, migration of this -string would not work because the string wouldn't obey the syntax rule -for a string in 'gawk'. 'SUBSEP' is used as a temporary replacement. +annoying circumstance is the fact that the elements of ‘MOBVAR’ cannot +hold the newline character (‘"\n"’). If they did, migration of this +string would not work because the string wouldn’t obey the syntax rule +for a string in ‘gawk’. ‘SUBSEP’ is used as a temporary replacement. If the list of hosts to visit holds at least one more entry, the agent migrates to that place to go on working there. Otherwise, we -replace the 'SUBSEP's with a newline character in the resulting string, +replace the ‘SUBSEP’s with a newline character in the resulting string, and report it to the originating host, whose name is stored in -'MOBVAR["MyOrigin"]'. +‘MOBVAR["MyOrigin"]’. ---------- Footnotes ---------- @@ -3094,21 +3094,21 @@ File: gawkinet.info, Node: STOXPRED, Next: PROTBASE, Prev: MOBAGWHO, Up: Som ape-descendent life forms are so amazingly primitive that they still think digital watches are a pretty neat idea. - This planet has -- or rather had -- a problem, which was this: most + This planet has — or rather had — a problem, which was this: most of the people living on it were unhappy for pretty much of the time. Many solutions were suggested for this problem, but most of these were largely concerned with the movements of small green - pieces of paper, which is odd because it wasn't the small green + pieces of paper, which is odd because it wasn’t the small green pieces of paper that were unhappy. - -- _Douglas Adams, 'The Hitch Hiker's Guide to the Galaxy'_ + — _Douglas Adams, ‘The Hitch Hiker’s Guide to the Galaxy’_ Valuable services on the Internet are usually _not_ implemented as mobile agents. There are much simpler ways of implementing services. -All Unix systems provide, for example, the 'cron' service. Unix system +All Unix systems provide, for example, the ‘cron’ service. Unix system users can write a list of tasks to be done each day, each week, twice a -day, or just once. The list is entered into a file named 'crontab'. +day, or just once. The list is entered into a file named ‘crontab’. For example, to distribute a newsletter on a daily basis this way, use -'cron' for calling a script each day early in the morning: +‘cron’ for calling a script each day early in the morning: # run at 8 am on weekdays, distribute the newsletter 0 8 * * 1-5 $HOME/bin/daily.job >> $HOME/log/newsletter 2>&1 @@ -3159,7 +3159,7 @@ anybody. ... The script as a whole is rather long. In order to ease the pain of -studying other people's source code, we have broken the script up into +studying other people’s source code, we have broken the script up into meaningful parts which are invoked one after the other. The basic structure of the script is as follows: @@ -3173,16 +3173,16 @@ structure of the script is as follows: } The earlier parts store data into variables and arrays which are -subsequently used by later parts of the script. The 'Init()' function +subsequently used by later parts of the script. The ‘Init()’ function first checks if the script is invoked correctly (without any parameters). If not, it informs the user of the correct usage. What follows are preparations for the retrieval of the historical quote data. -The names of the 30 stock shares are stored in an array 'name' along -with the current date in 'day', 'month', and 'year'. +The names of the 30 stock shares are stored in an array ‘name’ along +with the current date in ‘day’, ‘month’, and ‘year’. All users who are separated from the Internet by a firewall and have to direct their Internet accesses to a proxy must supply the name of the -proxy to this script with the '-v Proxy=NAME' option. For most users, +proxy to this script with the ‘-v Proxy=NAME’ option. For most users, the default proxy and port number should suffice. function Init() { @@ -3221,22 +3221,22 @@ values): Lines contain values of the same time instant, whereas columns are separated by commas and contain the kind of data that is described in -the header (first) line. At first, 'gawk' is instructed to separate -columns by commas ('FS = ","'). In the loop that follows, a connection +the header (first) line. At first, ‘gawk’ is instructed to separate +columns by commas (‘FS = ","’). In the loop that follows, a connection to the Yahoo server is first opened, then a download takes place, and finally the connection is closed. All this happens once for each ticker symbol. In the body of this loop, an Internet address is built up as a string according to the rules of the Yahoo server. The starting and ending date are chosen to be exactly the same, but one year apart in the -past. All the action is initiated within the 'printf' command which +past. All the action is initiated within the ‘printf’ command which transmits the request for data to the Yahoo server. - In the inner loop, the server's data is first read and then scanned + In the inner loop, the server’s data is first read and then scanned line by line. Only lines which have six columns and the name of a month in the first column contain relevant data. This data is stored in the -two-dimensional array 'quote'; one dimension being time, the other being -the ticker symbol. During retrieval of the first stock's data, the -calendar names of the time instances are stored in the array 'day' +two-dimensional array ‘quote’; one dimension being time, the other being +the ticker symbol. During retrieval of the first stock’s data, the +calendar names of the time instances are stored in the array ‘day’ because we need them later. function ReadQuotes() { @@ -3289,8 +3289,8 @@ algorithm: _If a stock fell yesterday, assume it will also fall today; if it rose yesterday, assume it will rise today_. (Feel free to replace this algorithm with a smarter one.) If a stock changed in the same direction on two consecutive days, this is an indication which should be -highlighted. Two-day advances are stored in 'hot' and two-day declines -in 'avoid'. +highlighted. Two-day advances are stored in ‘hot’ and two-day declines +in ‘avoid’. The rest of the function is a sanity check. It counts the number of correct predictions in relation to the total number of predictions one @@ -3329,9 +3329,9 @@ could have made in the year before. } } - At this point the hard work has been done: the array 'predict' + At this point the hard work has been done: the array ‘predict’ contains the predictions for all the ticker symbols. It is up to the -function 'Report()' to find some nice words to present the desired +function ‘Report()’ to find some nice words to present the desired information. function Report() { @@ -3368,8 +3368,8 @@ information. report = report "you should visit a doctor who can treat your ailment." } - The function 'SendMail()' goes through the list of customers and -opens a pipe to the 'mail' command for each of them. Each one receives + The function ‘SendMail()’ goes through the list of customers and +opens a pipe to the ‘mail’ command for each of them. Each one receives an email message with a proper subject heading and is addressed with his full name. @@ -3396,7 +3396,7 @@ to complete, depending upon network traffic and the speed of the available Internet link. The quality of the prediction algorithm is likely to be disappointing. Try to find a better one. Should you find one with a success rate of more than 50%, please tell us about it! It -is only for the sake of curiosity, of course. ':-)' +is only for the sake of curiosity, of course. ‘:-)’ File: gawkinet.info, Node: PROTBASE, Prev: STOXPRED, Up: Some Applications and Techniques @@ -3406,15 +3406,15 @@ File: gawkinet.info, Node: PROTBASE, Prev: STOXPRED, Up: Some Applications an Inside every large problem is a small problem struggling to get out.(1) - -- _With apologies to C.A.R. Hoare_ + — _With apologies to C.A.R. Hoare_ - Yahoo's database of stock market data is just one among the many + Yahoo’s database of stock market data is just one among the many large databases on the Internet. Another one is located at NCBI (National Center for Biotechnology Information). Established in 1988 as a national resource for molecular biology information, NCBI creates public databases, conducts research in computational biology, develops software tools for analyzing genome data, and disseminates biomedical -information. In this section, we look at one of NCBI's public services, +information. In this section, we look at one of NCBI’s public services, which is called BLAST (Basic Local Alignment Search Tool). You probably know that the information necessary for reproducing @@ -3436,35 +3436,35 @@ chose to offer their database service through popular Internet protocols. There are four basic ways to use the so-called BLAST services: - * The easiest way to use BLAST is through the web. Users may simply + • The easiest way to use BLAST is through the web. Users may simply point their browsers at the NCBI home page and link to the BLAST pages. NCBI provides a stable URL that may be used to perform BLAST searches without interactive use of a web browser. This is what we will do later in this section. A demonstration client and - a 'README' file demonstrate how to access this URL. + a ‘README’ file demonstrate how to access this URL. - * Currently, 'blastcl3' is the standard network BLAST client. You - can download 'blastcl3' from the anonymous FTP location. + • Currently, ‘blastcl3’ is the standard network BLAST client. You + can download ‘blastcl3’ from the anonymous FTP location. - * BLAST 2.0 can be run locally as a full executable and can be used + • BLAST 2.0 can be run locally as a full executable and can be used to run BLAST searches against private local databases, or downloaded copies of the NCBI databases. BLAST 2.0 executables may be found on the NCBI anonymous FTP server. - * The NCBI BLAST Email server is the best option for people without + • The NCBI BLAST Email server is the best option for people without convenient access to the web. A similarity search can be performed by sending a properly formatted mail message containing the nucleotide or protein query sequence to <blast@ncbi.nlm.nih.gov>. The query sequence is compared against the specified database using the BLAST algorithm and the results are returned in an email message. For more information on formulating email BLAST searches, - you can send a message consisting of the word "HELP" to the same + you can send a message consisting of the word “HELP” to the same address, <blast@ncbi.nlm.nih.gov>. Our starting point is the demonstration client mentioned in the first -option. The 'README' file that comes along with the client explains the +option. The ‘README’ file that comes along with the client explains the whole process in a nutshell. In the rest of this section, we first show -what such requests look like. Then we show how to use 'gawk' to +what such requests look like. Then we show how to use ‘gawk’ to implement a client in about 10 lines of code. Finally, we show how to interpret the result returned from the service. @@ -3472,10 +3472,10 @@ interpret the result returned from the service. amino acid and nucleic acid codes, with these exceptions: lower-case letters are accepted and are mapped into upper-case; a single hyphen or dash can be used to represent a gap of indeterminate length; and in -amino acid sequences, 'U' and '*' are acceptable letters (see below). +amino acid sequences, ‘U’ and ‘*’ are acceptable letters (see below). Before submitting a request, any numerical digits in the query sequence should either be removed or replaced by appropriate letter codes (e.g., -'N' for unknown nucleic acid residue or 'X' for unknown amino acid +‘N’ for unknown nucleic acid residue or ‘X’ for unknown amino acid residue). The nucleic acid codes supported are: A --> adenosine M --> A C (amino) @@ -3491,7 +3491,7 @@ residue). The nucleic acid codes supported are: Now you know the alphabet of nucleotide sequences. The last two lines of the following example query show such a sequence, which is obviously made up only of elements of the alphabet just described. -Store this example query into a file named 'protbase.request'. You are +Store this example query into a file named ‘protbase.request’. You are now ready to send it to the server with the demonstration client. PROGRAM blastn @@ -3503,38 +3503,38 @@ now ready to send it to the server with the demonstration client. caccaccatggacagcaaa The actual search request begins with the mandatory parameter -'PROGRAM' in the first column followed by the value 'blastn' (the name +‘PROGRAM’ in the first column followed by the value ‘blastn’ (the name of the program) for searching nucleic acids. The next line contains the -mandatory search parameter 'DATALIB' with the value 'month' for the +mandatory search parameter ‘DATALIB’ with the value ‘month’ for the newest nucleic acid sequences. The third line contains an optional -'EXPECT' parameter and the value desired for it. The fourth line -contains the mandatory 'BEGIN' directive, followed by the query sequence +‘EXPECT’ parameter and the value desired for it. The fourth line +contains the mandatory ‘BEGIN’ directive, followed by the query sequence in FASTA/Pearson format. Each line of information must be less than 80 characters in length. - The "month" database contains all new or revised sequences released + The “month” database contains all new or revised sequences released in the last 30 days and is useful for searching against new sequences. -There are five different blast programs, 'blastn' being the one that +There are five different blast programs, ‘blastn’ being the one that compares a nucleotide query sequence against a nucleotide sequence database. The last server directive that must appear in every request is the -'BEGIN' directive. The query sequence should immediately follow the -'BEGIN' directive and must appear in FASTA/Pearson format. A sequence +‘BEGIN’ directive. The query sequence should immediately follow the +‘BEGIN’ directive and must appear in FASTA/Pearson format. A sequence in FASTA/Pearson format begins with a single-line description. The description line, which is required, is distinguished from the lines of -sequence data that follow it by having a greater-than ('>') symbol in +sequence data that follow it by having a greater-than (‘>’) symbol in the first column. For the purposes of the BLAST server, the text of the description is arbitrary. - If you prefer to use a client written in 'gawk', just store the -following 10 lines of code into a file named 'protbase.awk' and use this -client instead. Invoke it with 'gawk -f protbase.awk protbase.request'. + If you prefer to use a client written in ‘gawk’, just store the +following 10 lines of code into a file named ‘protbase.awk’ and use this +client instead. Invoke it with ‘gawk -f protbase.awk protbase.request’. Then wait a minute and watch the result coming in. In order to -replicate the demonstration client's behavior as closely as possible, +replicate the demonstration client’s behavior as closely as possible, this client does not use a proxy server. We could also have extended the client program in *note Retrieving Web Pages: GETURL, to implement -the client request from 'protbase.awk' as a special case. +the client request from ‘protbase.awk’ as a special case. { request = request "\n" $0 } @@ -3560,7 +3560,7 @@ prints the complete result coming from the server. you can ignore. Then there are some comments about the query having been filtered to avoid spuriously high scores. After this, there is a reference to the paper that describes the software being used for -searching the data base. After a repetition of the original query's +searching the data base. After a repetition of the original query’s description we find the list of significant alignments: Sequences producing significant alignments: (bits) Value @@ -3597,9 +3597,9 @@ for the databases from which the sequences were derived. Patents pat|country|number GenInfo Backbone Id bbs|number - For example, an identifier might be 'gb|AC021182.14|AC021182', where -the 'gb' tag indicates that the identifier refers to a GenBank sequence, -'AC021182.14' is its GenBank ACCESSION, and 'AC021182' is the GenBank + For example, an identifier might be ‘gb|AC021182.14|AC021182’, where +the ‘gb’ tag indicates that the identifier refers to a GenBank sequence, +‘AC021182.14’ is its GenBank ACCESSION, and ‘AC021182’ is the GenBank LOCUS. The identifier contains no spaces, so that a space indicates the end of the identifier. @@ -3627,29 +3627,29 @@ fragment on chromosome 7. If you are still reading at this point, you are probably interested in finding out more about Computational Biology and you might appreciate the following hints. - 1. There is a book called 'Introduction to Computational Biology' by + 1. There is a book called ‘Introduction to Computational Biology’ by Michael S. Waterman, which is worth reading if you are seriously interested. You can find a good book review on the Internet. - 2. While Waterman's book explains the algorithms employed internally + 2. While Waterman’s book explains the algorithms employed internally in the database search engines, most practitioners prefer to approach the subject differently. The applied side of Computational Biology is called Bioinformatics, and emphasizes the tools available for day-to-day work as well as how to actually _use_ them. One of the very few affordable books on Bioinformatics - is 'Developing Bioinformatics Computer Skills'. + is ‘Developing Bioinformatics Computer Skills’. 3. The sequences _gawk_ and _gnuawk_ are in widespread use in the genetic material of virtually every earthly living being. Let us take this as a clear indication that the divine creator has - intended 'gawk' to prevail over other scripting languages such as - 'perl', 'tcl', or 'python' which are not even proper sequences. + intended ‘gawk’ to prevail over other scripting languages such as + ‘perl’, ‘tcl’, or ‘python’ which are not even proper sequences. (:-) ---------- Footnotes ---------- - (1) What C.A.R. Hoare actually said was "Inside every large program -is a small program struggling to get out." + (1) What C.A.R. Hoare actually said was “Inside every large program +is a small program struggling to get out.” File: gawkinet.info, Node: Links, Next: GNU Free Documentation License, Prev: Some Applications and Techniques, Up: Top @@ -3660,19 +3660,19 @@ File: gawkinet.info, Node: Links, Next: GNU Free Documentation License, Prev: This section lists the URLs for various items discussed in this Info file. They are presented in the order in which they appear. -'Internet Programming with Python' +‘Internet Programming with Python’ <http://cewing.github.io/training.python_web/html/index.html> -'Advanced Perl Programming' +‘Advanced Perl Programming’ <http://www.oreilly.com/catalog/advperl> -'Web Client Programming with Perl' +‘Web Client Programming with Perl’ <http://www.oreilly.com/catalog/webclient> -Richard Stevens's home page and book +Richard Stevens’s home page and book <http://www.kohala.com/start> -Volume III of 'Internetworking with TCP/IP', by Comer and Stevens +Volume III of ‘Internetworking with TCP/IP’, by Comer and Stevens <http://www.cs.purdue.edu/homes/dec/tcpip3s.cont.html> XBM Graphics File Format @@ -3681,7 +3681,7 @@ XBM Graphics File Format GNUPlot <http://www.gnuplot.info> -Mark Humphrys' Eliza page +Mark Humphrys’ Eliza page <https://computing.dcu.ie/~humphrys/eliza.html> Eliza on Wikipedia @@ -3715,7 +3715,7 @@ MiniSQL Market Share Surveys <http://www.netcraft.com/survey> -'Numerical Recipes in C: The Art of Scientific Computing' +‘Numerical Recipes in C: The Art of Scientific Computing’ <http://numerical.recipes/> VRML @@ -3754,10 +3754,10 @@ FASTA/Pearson Format Fasta/Pearson Sequence in Java <http://www.kazusa.or.jp/java/codon_table_java/> -Book Review of 'Introduction to Computational Biology' +Book Review of ‘Introduction to Computational Biology’ <https://dl.acm.org/doi/abs/10.1145/332925.332927> -'Developing Bioinformatics Computer Skills' +‘Developing Bioinformatics Computer Skills’ <http://www.oreilly.com/catalog/bioskills/> @@ -3768,7 +3768,7 @@ GNU Free Documentation License Version 1.3, 3 November 2008 - Copyright (C) 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. + Copyright © 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. <http://fsf.org/> Everyone is permitted to copy and distribute verbatim copies @@ -3777,14 +3777,14 @@ GNU Free Documentation License 0. PREAMBLE The purpose of this License is to make a manual, textbook, or other - functional and useful document "free" in the sense of freedom: to + functional and useful document “free” in the sense of freedom: to assure everyone the effective freedom to copy and redistribute it, with or without modifying it, either commercially or noncommercially. Secondarily, this License preserves for the author and publisher a way to get credit for their work, while not being considered responsible for modifications made by others. - This License is a kind of "copyleft", which means that derivative + This License is a kind of “copyleft”, which means that derivative works of the document must themselves be free in the same sense. It complements the GNU General Public License, which is a copyleft license designed for free software. @@ -3805,18 +3805,18 @@ GNU Free Documentation License be distributed under the terms of this License. Such a notice grants a world-wide, royalty-free license, unlimited in duration, to use that work under the conditions stated herein. The - "Document", below, refers to any such manual or work. Any member - of the public is a licensee, and is addressed as "you". You accept + “Document”, below, refers to any such manual or work. Any member + of the public is a licensee, and is addressed as “you”. You accept the license if you copy, modify or distribute the work in a way requiring permission under copyright law. - A "Modified Version" of the Document means any work containing the + A “Modified Version” of the Document means any work containing the Document or a portion of it, either copied verbatim, or with modifications and/or translated into another language. - A "Secondary Section" is a named appendix or a front-matter section + A “Secondary Section” is a named appendix or a front-matter section of the Document that deals exclusively with the relationship of the - publishers or authors of the Document to the Document's overall + publishers or authors of the Document to the Document’s overall subject (or to related matters) and contains nothing that could fall directly within that overall subject. (Thus, if the Document is in part a textbook of mathematics, a Secondary Section may not @@ -3825,7 +3825,7 @@ GNU Free Documentation License of legal, commercial, philosophical, ethical or political position regarding them. - The "Invariant Sections" are certain Secondary Sections whose + The “Invariant Sections” are certain Secondary Sections whose titles are designated, as being those of Invariant Sections, in the notice that says that the Document is released under this License. If a section does not fit the above definition of Secondary then it @@ -3833,13 +3833,13 @@ GNU Free Documentation License contain zero Invariant Sections. If the Document does not identify any Invariant Sections then there are none. - The "Cover Texts" are certain short passages of text that are + The “Cover Texts” are certain short passages of text that are listed, as Front-Cover Texts or Back-Cover Texts, in the notice that says that the Document is released under this License. A Front-Cover Text may be at most 5 words, and a Back-Cover Text may be at most 25 words. - A "Transparent" copy of the Document means a machine-readable copy, + A “Transparent” copy of the Document means a machine-readable copy, represented in a format whose specification is available to the general public, that is suitable for revising the document straightforwardly with generic text editors or (for images composed @@ -3851,7 +3851,7 @@ GNU Free Documentation License been arranged to thwart or discourage subsequent modification by readers is not Transparent. An image format is not Transparent if used for any substantial amount of text. A copy that is not - "Transparent" is called "Opaque". + “Transparent” is called “Opaque”. Examples of suitable formats for Transparent copies include plain ASCII without markup, Texinfo input format, LaTeX input format, @@ -3864,23 +3864,23 @@ GNU Free Documentation License the machine-generated HTML, PostScript or PDF produced by some word processors for output purposes only. - The "Title Page" means, for a printed book, the title page itself, + The “Title Page” means, for a printed book, the title page itself, plus such following pages as are needed to hold, legibly, the material this License requires to appear in the title page. For - works in formats which do not have any title page as such, "Title - Page" means the text near the most prominent appearance of the - work's title, preceding the beginning of the body of the text. + works in formats which do not have any title page as such, “Title + Page” means the text near the most prominent appearance of the + work’s title, preceding the beginning of the body of the text. - The "publisher" means any person or entity that distributes copies + The “publisher” means any person or entity that distributes copies of the Document to the public. - A section "Entitled XYZ" means a named subunit of the Document + A section “Entitled XYZ” means a named subunit of the Document whose title either is precisely XYZ or contains XYZ in parentheses following text that translates XYZ in another language. (Here XYZ stands for a specific section name mentioned below, such as - "Acknowledgements", "Dedications", "Endorsements", or "History".) - To "Preserve the Title" of such a section when you modify the - Document means that it remains a section "Entitled XYZ" according + “Acknowledgements”, “Dedications”, “Endorsements”, or “History”.) + To “Preserve the Title” of such a section when you modify the + Document means that it remains a section “Entitled XYZ” according to this definition. The Document may include Warranty Disclaimers next to the notice @@ -3910,7 +3910,7 @@ GNU Free Documentation License If you publish printed copies (or copies in media that commonly have printed covers) of the Document, numbering more than 100, and - the Document's license notice requires Cover Texts, you must + the Document’s license notice requires Cover Texts, you must enclose the copies in covers that carry, clearly and legibly, all these Cover Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on the back cover. Both covers must also clearly @@ -3982,15 +3982,15 @@ GNU Free Documentation License the Addendum below. G. Preserve in that license notice the full lists of Invariant - Sections and required Cover Texts given in the Document's + Sections and required Cover Texts given in the Document’s license notice. H. Include an unaltered copy of this License. - I. Preserve the section Entitled "History", Preserve its Title, + I. Preserve the section Entitled “History”, Preserve its Title, and add to it an item stating at least the title, year, new authors, and publisher of the Modified Version as given on the - Title Page. If there is no section Entitled "History" in the + Title Page. If there is no section Entitled “History” in the Document, create one stating the title, year, authors, and publisher of the Document as given on its Title Page, then add an item describing the Modified Version as stated in the @@ -4000,12 +4000,12 @@ GNU Free Documentation License for public access to a Transparent copy of the Document, and likewise the network locations given in the Document for previous versions it was based on. These may be placed in the - "History" section. You may omit a network location for a work + “History” section. You may omit a network location for a work that was published at least four years before the Document itself, or if the original publisher of the version it refers to gives permission. - K. For any section Entitled "Acknowledgements" or "Dedications", + K. For any section Entitled “Acknowledgements” or “Dedications”, Preserve the Title of the section, and preserve in the section all the substance and tone of each of the contributor acknowledgements and/or dedications given therein. @@ -4014,11 +4014,11 @@ GNU Free Documentation License in their text and in their titles. Section numbers or the equivalent are not considered part of the section titles. - M. Delete any section Entitled "Endorsements". Such a section + M. Delete any section Entitled “Endorsements”. Such a section may not be included in the Modified Version. N. Do not retitle any existing section to be Entitled - "Endorsements" or to conflict in title with any Invariant + “Endorsements” or to conflict in title with any Invariant Section. O. Preserve any Warranty Disclaimers. @@ -4027,15 +4027,15 @@ GNU Free Documentation License appendices that qualify as Secondary Sections and contain no material copied from the Document, you may at your option designate some or all of these sections as invariant. To do this, add their - titles to the list of Invariant Sections in the Modified Version's + titles to the list of Invariant Sections in the Modified Version’s license notice. These titles must be distinct from any other section titles. - You may add a section Entitled "Endorsements", provided it contains + You may add a section Entitled “Endorsements”, provided it contains nothing but endorsements of your Modified Version by various - parties--for example, statements of peer review or that the text - has been approved by an organization as the authoritative - definition of a standard. + parties—for example, statements of peer review or that the text has + been approved by an organization as the authoritative definition of + a standard. You may add a passage of up to five words as a Front-Cover Text, and a passage of up to 25 words as a Back-Cover Text, to the end of @@ -4073,10 +4073,10 @@ GNU Free Documentation License combined work. In the combination, you must combine any sections Entitled - "History" in the various original documents, forming one section - Entitled "History"; likewise combine any sections Entitled - "Acknowledgements", and any sections Entitled "Dedications". You - must delete all sections Entitled "Endorsements." + “History” in the various original documents, forming one section + Entitled “History”; likewise combine any sections Entitled + “Acknowledgements”, and any sections Entitled “Dedications”. You + must delete all sections Entitled “Endorsements.” 6. COLLECTIONS OF DOCUMENTS @@ -4097,16 +4097,16 @@ GNU Free Documentation License A compilation of the Document or its derivatives with other separate and independent documents or works, in or on a volume of a - storage or distribution medium, is called an "aggregate" if the + storage or distribution medium, is called an “aggregate” if the copyright resulting from the compilation is not used to limit the - legal rights of the compilation's users beyond what the individual + legal rights of the compilation’s users beyond what the individual works permit. When the Document is included in an aggregate, this License does not apply to the other works in the aggregate which are not themselves derivative works of the Document. If the Cover Text requirement of section 3 is applicable to these copies of the Document, then if the Document is less than one half - of the entire aggregate, the Document's Cover Texts may be placed + of the entire aggregate, the Document’s Cover Texts may be placed on covers that bracket the Document within the aggregate, or the electronic equivalent of covers if the Document is in electronic form. Otherwise they must appear on printed covers that bracket @@ -4128,8 +4128,8 @@ GNU Free Documentation License this License or a notice or disclaimer, the original version will prevail. - If a section in the Document is Entitled "Acknowledgements", - "Dedications", or "History", the requirement (section 4) to + If a section in the Document is Entitled “Acknowledgements”, + “Dedications”, or “History”, the requirement (section 4) to Preserve its Title (section 1) will typically require changing the actual title. @@ -4170,7 +4170,7 @@ GNU Free Documentation License Each version of the License is given a distinguishing version number. If the Document specifies that a particular numbered - version of this License "or any later version" applies to it, you + version of this License “or any later version” applies to it, you have the option of following the terms and conditions either of that specified version or of any later version that has been published (not as a draft) by the Free Software Foundation. If the @@ -4178,29 +4178,29 @@ GNU Free Documentation License choose any version ever published (not as a draft) by the Free Software Foundation. If the Document specifies that a proxy can decide which future versions of this License can be used, that - proxy's public statement of acceptance of a version permanently + proxy’s public statement of acceptance of a version permanently authorizes you to choose that version for the Document. 11. RELICENSING - "Massive Multiauthor Collaboration Site" (or "MMC Site") means any + “Massive Multiauthor Collaboration Site” (or “MMC Site”) means any World Wide Web server that publishes copyrightable works and also provides prominent facilities for anybody to edit those works. A public wiki that anybody can edit is an example of such a server. - A "Massive Multiauthor Collaboration" (or "MMC") contained in the + A “Massive Multiauthor Collaboration” (or “MMC”) contained in the site means any set of copyrightable works thus published on the MMC site. - "CC-BY-SA" means the Creative Commons Attribution-Share Alike 3.0 + “CC-BY-SA” means the Creative Commons Attribution-Share Alike 3.0 license published by Creative Commons Corporation, a not-for-profit corporation with a principal place of business in San Francisco, California, as well as future copyleft versions of that license published by that same organization. - "Incorporate" means to publish or republish a Document, in whole or + “Incorporate” means to publish or republish a Document, in whole or in part, as part of another Document. - An MMC is "eligible for relicensing" if it is licensed under this + An MMC is “eligible for relicensing” if it is licensed under this License, and if all works that were first published under this License somewhere other than this MMC, and subsequently incorporated in whole or in part into the MMC, (1) had no cover @@ -4227,7 +4227,7 @@ notices just after the title page: Free Documentation License''. If you have Invariant Sections, Front-Cover Texts and Back-Cover -Texts, replace the "with...Texts." line with this: +Texts, replace the “with...Texts.” line with this: with the Invariant Sections being LIST THEIR TITLES, with the Front-Cover Texts being LIST, and with the Back-Cover Texts @@ -4433,63 +4433,63 @@ Index Tag Table: -Node: Top1108 -Node: Preface4769 -Node: Introduction6144 -Node: Stream Communications7170 -Node: Datagram Communications8344 -Node: The TCP/IP Protocols9974 -Ref: The TCP/IP Protocols-Footnote-110658 -Node: Basic Protocols10815 -Ref: Basic Protocols-Footnote-112860 -Node: Ports12889 -Node: Making Connections14296 -Ref: Making Connections-Footnote-116854 -Ref: Making Connections-Footnote-216901 -Node: Using Networking17082 -Node: Gawk Special Files19405 -Node: Special File Fields21214 -Ref: table-inet-components25114 -Node: Comparing Protocols26426 -Node: File /inet/tcp26960 -Node: File /inet/udp27946 -Ref: File /inet/udp-Footnote-129658 -Node: TCP Connecting29912 -Node: Troubleshooting33347 -Ref: Troubleshooting-Footnote-136111 -Node: Interacting37068 -Node: Setting Up41426 -Node: Email45975 -Ref: Email-Footnote-148397 -Node: Web page49205 -Ref: Web page-Footnote-152025 -Ref: Web page-Footnote-252223 -Node: Primitive Service52717 -Node: Interacting Service55451 -Ref: Interacting Service-Footnote-164606 -Node: CGI Lib64638 -Node: Simple Server71638 -Ref: Simple Server-Footnote-179440 -Node: Caveats79541 -Node: Challenges80684 -Ref: Challenges-Footnote-189426 -Node: Some Applications and Techniques89527 -Node: PANIC91988 -Node: GETURL93714 -Node: REMCONF96347 -Node: URLCHK101843 -Node: WEBGRAB105687 -Node: STATIST110151 -Ref: STATIST-Footnote-1123300 -Node: MAZE123743 -Node: MOBAGWHO129968 -Ref: MOBAGWHO-Footnote-1143870 -Node: STOXPRED143938 -Node: PROTBASE158230 -Ref: PROTBASE-Footnote-1171397 -Node: Links171512 -Node: GNU Free Documentation License174403 -Node: Index199523 +Node: Top1138 +Node: Preface4831 +Node: Introduction6252 +Node: Stream Communications7282 +Node: Datagram Communications8466 +Node: The TCP/IP Protocols10114 +Ref: The TCP/IP Protocols-Footnote-110801 +Node: Basic Protocols10958 +Ref: Basic Protocols-Footnote-113023 +Node: Ports13054 +Node: Making Connections14474 +Ref: Making Connections-Footnote-117080 +Ref: Making Connections-Footnote-217127 +Node: Using Networking17317 +Node: Gawk Special Files19680 +Node: Special File Fields21541 +Ref: table-inet-components25534 +Node: Comparing Protocols26850 +Node: File /inet/tcp27384 +Node: File /inet/udp28374 +Ref: File /inet/udp-Footnote-130114 +Node: TCP Connecting30372 +Node: Troubleshooting33905 +Ref: Troubleshooting-Footnote-136697 +Node: Interacting37677 +Node: Setting Up42111 +Node: Email46750 +Ref: Email-Footnote-149196 +Node: Web page50016 +Ref: Web page-Footnote-152892 +Ref: Web page-Footnote-253090 +Node: Primitive Service53588 +Node: Interacting Service56344 +Ref: Interacting Service-Footnote-165601 +Node: CGI Lib65633 +Node: Simple Server72715 +Ref: Simple Server-Footnote-180574 +Node: Caveats80679 +Node: Challenges81841 +Ref: Challenges-Footnote-190644 +Node: Some Applications and Techniques90749 +Node: PANIC93244 +Node: GETURL94982 +Node: REMCONF97637 +Node: URLCHK103171 +Node: WEBGRAB107043 +Node: STATIST111585 +Ref: STATIST-Footnote-1124897 +Node: MAZE125352 +Node: MOBAGWHO131617 +Ref: MOBAGWHO-Footnote-1145786 +Node: STOXPRED145854 +Node: PROTBASE160247 +Ref: PROTBASE-Footnote-1173577 +Node: Links173696 +Node: GNU Free Documentation License176619 +Node: Index201942 End Tag Table diff --git a/doc/gawkinet.texi b/doc/gawkinet.texi index 324926fd..3ba14318 100644 --- a/doc/gawkinet.texi +++ b/doc/gawkinet.texi @@ -943,7 +943,7 @@ like these have been used to teach TCP/IP networking and therefore you can still find much educational material of good quality on the Internet about such outdated services. The @uref{https://tf.nist.gov/tf-cgi/servers.cgi, list of servers} -that still support the legacy service +that still support the legacy service @uref{https://en.wikipedia.org/wiki/Daytime_Protocol, daytime} can be found at Wikipedia. We hesitated to use this service in this manual because it is hard to find servers that still support @@ -1241,7 +1241,7 @@ example, you can see how simple it is to open up a security hole on your machine. If you allow clients to connect to your machine and execute arbitrary commands, anyone would be free to do @samp{rm -rf *}. -The client side connects to port number 8888 on the server side and +The client side connects to port number 8888 on the server side and sends the name of the desired file to be sent across the same TCP connection. The main loop reads all content coming in from the TCP connection line-wise and prints it. @@ -3002,7 +3002,7 @@ sure that none of the above reveals too much information about your system. @cindex PNG image format @cindex PS image format @cindex Boutell, Thomas -@image{statist,3in} +@image{gawk_statist,3in} In the HTTP server examples we've shown thus far, we never present an image to the browser and its user. Presenting images is one task. Generating @@ -4431,7 +4431,7 @@ They are presented in the order in which they appear. @item Mark Humphrys' Eliza page @uref{https://computing.dcu.ie/~humphrys/eliza.html} -@item Eliza on Wikipedia +@item Eliza on Wikipedia @uref{https://en.wikipedia.org/wiki/ELIZA} @item Java versions of Eliza with source code diff --git a/doc/gawktexi.in b/doc/gawktexi.in index 707d0758..eae07b96 100644 --- a/doc/gawktexi.in +++ b/doc/gawktexi.in @@ -45,7 +45,7 @@ @ifnottex @set TIMES * @end ifnottex - + @c Let texinfo.tex give us full section titles @xrefautomaticsectiontitle on @@ -68,7 +68,7 @@ @set TITLE GAWK: Effective AWK Programming @end ifclear @set SUBTITLE A User's Guide for GNU Awk -@set EDITION 5.2 +@set EDITION 5.3 @iftex @set DOCUMENT book @@ -5024,6 +5024,10 @@ thus reducing the need for writing complex and tedious command lines. In particular, @code{@@include} is very useful for writing CGI scripts to be run from web pages. +The @code{@@include} directive and the @option{-i}/@option{--include} +command line option are completely equivalent. An included program +source is not loaded if it has been previously loaded. + The rules for finding a source file described in @ref{AWKPATH Variable} also apply to files loaded with @code{@@include}. @@ -5238,7 +5242,7 @@ non-option argument, even if it begins with @samp{-}. @itemize @value{MINUS} @item However, when an option itself requires an argument, and the option is separated -from that argument on the command line by at least one space, the space +from that argument on the command line by at least one space, the space is ignored, and the argument is considered to be related to the option. Thus, in the invocation, @samp{gawk -F x}, the @samp{x} is treated as belonging to the @option{-F} option, not as a separate non-option argument. @@ -6120,10 +6124,10 @@ Subject: Re: [bug-gawk] Does gawk character classes follow this? > From: arnold@skeeve.com > Date: Fri, 15 Feb 2019 03:01:34 -0700 > Cc: pengyu.ut@gmail.com, bug-gawk@gnu.org -> +> > I get the feeling that there's something really bothering you, but > I don't understand what. -> +> > Can you clarify, please? I thought I already did: we cannot be expected to provide a definitive @@ -8673,7 +8677,7 @@ processing on the next record @emph{right now}. For example: @{ while ((start = index($0, "/*")) != 0) @{ out = substr($0, 1, start - 1) # leading part of the string - rest = substr($0, start + 2) # ... */ ... + rest = substr($0, start + 2) # ... */ ... while ((end = index(rest, "*/")) == 0) @{ # is */ in trailing part? # get more text if (getline <= 0) @{ @@ -9299,7 +9303,7 @@ on a per-command or per-connection basis. the attempt to read from the underlying device may succeed in a later attempt. This is a limitation, and it also means that you cannot use this to multiplex input from -two or more sources. @xref{Retrying Input} for a way to enable +two or more sources. @xref{Retrying Input} for a way to enable later I/O attempts to succeed. Assigning a timeout value prevents read operations from being @@ -11296,7 +11300,7 @@ intact, as part of the string: @example $ @kbd{nawk 'BEGIN @{ print "hello, \} > @kbd{world" @}'} -@print{} hello, +@print{} hello, @print{} world @end example @@ -16437,7 +16441,7 @@ conceptually, if the element values are eight, @code{"foo"}, @ifnotdocbook @float Figure,figure-array-elements @caption{A contiguous array} -@center @image{array-elements, , , A Contiguous Array} +@center @image{gawk_array-elements, , , A Contiguous Array} @end float @end ifnotdocbook @@ -22864,7 +22868,7 @@ $ cat @kbd{test.awk} @print{} rewound = 1 @print{} rewind() @print{} @} -@print{} +@print{} @print{} @{ print FILENAME, FNR, $0 @} $ @kbd{gawk -f rewind.awk -f test.awk data } @@ -25579,7 +25583,7 @@ exist: @example @c file eg/prog/id.awk -function fill_info_for_user(user, +function fill_info_for_user(user, pwent, fields, groupnames, grent, groups, i) @{ pwent = getpwnam(user) @@ -29589,20 +29593,20 @@ using ptys can help deal with buffering deadlocks. Suppose @command{gawk} were unable to add numbers. You could use a coprocess to do it. Here's an exceedingly -simple program written for that purpose: +simple program written for that purpose: @example $ @kbd{cat add.c} -#include <stdio.h> - -int -main(void) -@{ - int x, y; - while (scanf("%d %d", & x, & y) == 2) - printf("%d\n", x + y); - return 0; -@} +#include <stdio.h> + +int +main(void) +@{ + int x, y; + while (scanf("%d %d", & x, & y) == 2) + printf("%d\n", x + y); + return 0; +@} $ @kbd{cc -O add.c -o add} @ii{Compile the program} @end example @@ -29615,15 +29619,15 @@ $ @kbd{echo 1 2 |} @end example And it would deadlock, because @file{add.c} fails to call -@samp{setlinebuf(stdout)}. The @command{add} program freezes. +@samp{setlinebuf(stdout)}. The @command{add} program freezes. -Now try instead: +Now try instead: @example $ @kbd{echo 1 2 |} > @kbd{gawk -v cmd=add 'BEGIN @{ PROCINFO[cmd, "pty"] = 1 @}} > @kbd{ @{ print |& cmd; cmd |& getline x; print x @}'} -@print{} 3 +@print{} 3 @end example By using a pty, @command{gawk} fools the standard I/O library into @@ -30214,7 +30218,7 @@ Terence Kelly, the author of the persistent memory allocator @command{gawk} uses, provides the following advice about the backing file: @quotation -Regarding backing file size, I recommend making it far larger +Regarding backing file size, I recommend making it far larger than all of the data that will ever reside in it, assuming that the file system supports sparse files. The ``pay only for what you use'' aspect of sparse files ensures that the @@ -30302,8 +30306,8 @@ ACM @cite{Queue} magazine, Vol. 20 No. 2 (March/April 2022), @uref{https://dl.acm.org/doi/pdf/10.1145/3534855, PDF}, @uref{https://queue.acm.org/detail.cfm?id=3534855, HTML}. This paper explains the design of the PMA -allocator used in persistent @command{gawk}. - +allocator used in persistent @command{gawk}. + @item @cite{Persistent Scripting} Zi Fan Tan, Jianan Li, Haris Volos, and Terence Kelly, Non-Volatile Memory Workshop (NVMW) 2022, @@ -30315,7 +30319,7 @@ non-volatile memory; note that the interface differs slightly. @item @cite{Persistent Memory Programming on Conventional Hardware} Terence Kelly, ACM @cite{Queue} magazine Vol. 17 No. 4 (July/Aug 2019), -@uref{https://dl.acm.org/doi/pdf/10.1145/3358955.3358957, PDF}, +@uref{https://dl.acm.org/doi/pdf/10.1145/3358955.3358957, PDF}, @uref{https://queue.acm.org/detail.cfm?id=3358957, HTML}. This paper describes simple techniques for persistent memory for C/C++ code on conventional computers that lack non-volatile memory hardware. @@ -30325,8 +30329,8 @@ Terence Kelly, ACM @cite{Queue} magazine Vol. 18 No. 2 (March/April 2020), @uref{https://dl.acm.org/doi/pdf/10.1145/3400899.3400902, PDF}, @uref{https://queue.acm.org/detail.cfm?id=3400902, HTML}. -This paper describes a simple and robust testbed for testing software -against real power failures. +This paper describes a simple and robust testbed for testing software +against real power failures. @item @cite{Crashproofing the Original NoSQL Key/Value Store} Terence Kelly, @@ -34557,7 +34561,7 @@ It's Euler's modification to Newton's method for calculating pi. Take a look at lines (23) - (25) here: http://mathworld.wolfram.com/PiFormulas.htm -The algorithm I wrote simply expands the multiply by 2 and works from the innermost expression outwards. I used this to program HP calculators because it's quite easy to modify for tiny memory devices with smallish word sizes. +The algorithm I wrote simply expands the multiply by 2 and works from the innermost expression outwards. I used this to program HP calculators because it's quite easy to modify for tiny memory devices with smallish word sizes. http://www.hpmuseum.org/cgi-sys/cgiwrap/hpmuseum/articles.cgi?read=899 @@ -34927,7 +34931,7 @@ This is shown in @inlineraw{docbook, <xref linkend="figure-load-extension"/>}. @ifnotdocbook @float Figure,figure-load-extension @caption{Loading the extension} -@center @image{api-figure1, , , Loading the extension} +@center @image{gawk_api-figure1, , , Loading the extension} @end float @end ifnotdocbook @@ -34954,7 +34958,7 @@ This is shown in @inlineraw{docbook, <xref linkend="figure-register-new-function @ifnotdocbook @float Figure,figure-register-new-function @caption{Registering a new function} -@center @image{api-figure2, , , Registering a new Function} +@center @image{gawk_api-figure2, , , Registering a new Function} @end float @end ifnotdocbook @@ -34982,7 +34986,7 @@ This is shown in @inlineraw{docbook, <xref linkend="figure-call-new-function"/>} @ifnotdocbook @float Figure,figure-call-new-function @caption{Calling the new function} -@center @image{api-figure3, , , Calling the new function} +@center @image{gawk_api-figure3, , , Calling the new function} @end float @end ifnotdocbook @@ -35915,7 +35919,7 @@ is invoked with the @option{--version} option. @cindex customized input parser By default, @command{gawk} reads text files as its input. It uses the value -of @code{RS} to find the end of the record, and then uses @code{FS} +of @code{RS} to find the end of an input record, and then uses @code{FS} (or @code{FIELDWIDTHS} or @code{FPAT}) to split it into fields (@pxref{Reading Files}). Additionally, it sets the value of @code{RT} (@pxref{Built-in Variables}). @@ -36017,13 +36021,33 @@ are as follows: The name of the file. @item int fd; -A file descriptor for the file. If @command{gawk} was able to -open the file, then @code{fd} will @emph{not} be equal to +A file descriptor for the file. @command{gawk} attempts to open +the file for reading using the @code{open()} system call. If it was +able to open the file, then @code{fd} will @emph{not} be equal to @code{INVALID_HANDLE}. Otherwise, it will. +An extension can decide that it doesn't want to use the open file descriptor +provided by @command{gawk}. In such a case it can close the file and +set @code{fd} to @code{INVALID_HANDLE}, or it can leave it alone and +keep it's own file descriptor in private data pointed to by the +@code{opaque} pointer (see further in this list). In any case, if +the file descriptor is valid, it should @emph{not} just overwrite the +value with something else; doing so would cause a resource leak. + @item struct stat sbuf; If the file descriptor is valid, then @command{gawk} will have filled in this structure via a call to the @code{fstat()} system call. +Otherwise, if the @code{lstat()} system call is available, it will +use that. If @code{lstat()} is not available, then it uses @code{stat()}. + +Getting the file's information allows extensions to check the type of +the file even if it could not be opened. This occurs, for example, +on Windows systems when trying to use @code{open()} on a directory. + +If @command{gawk} was not able to get the file information, then +@code{sbuf} will be zeroed out. In particular, extension code +can check if @samp{sbuf.st_mode == 0}. If that's true, then there +is no information in @code{sbuf}. @end table The @code{@var{XXX}_can_take_file()} function should examine these @@ -36058,7 +36082,7 @@ This function pointer should point to a function that creates the input records. Said function is the core of the input parser. Its behavior is described in the text following this list. -@item ssize_t (*read_func)(); +@item ssize_t (*read_func)(int, void *, size_t); This function pointer should point to a function that has the same behavior as the standard POSIX @code{read()} system call. It is an alternative to the @code{get_record} pointer. Its behavior @@ -36086,12 +36110,12 @@ input records. The parameters are as follows: @item char **out This is a pointer to a @code{char *} variable that is set to point to the record. @command{gawk} makes its own copy of the data, so -the extension must manage this storage. +your extension must manage this storage. @item struct awk_input *iobuf -This is the @code{awk_input_buf_t} for the file. The fields should be -used for reading data (@code{fd}) and for managing private state -(@code{opaque}), if any. +This is the @code{awk_input_buf_t} for the file. Two of its fields should +be used by your extension: @code{fd} for reading data, and @code{opaque} +for managing any private state. @item int *errcode If an error occurs, @code{*errcode} should be set to an appropriate @@ -36103,7 +36127,7 @@ If the concept of a ``record terminator'' makes sense, then @code{*rt_start} should be set to point to the data to be used for @code{RT}, and @code{*rt_len} should be set to the length of the data. Otherwise, @code{*rt_len} should be set to zero. -@command{gawk} makes its own copy of this data, so the +Here too, @command{gawk} makes its own copy of this data, so your extension must manage this storage. @item const awk_fieldwidth_info_t **field_width @@ -36114,7 +36138,9 @@ field parsing mechanism. Note that this structure will not be copied by @command{gawk}; it must persist at least until the next call to @code{get_record} or @code{close_func}. Note also that @code{field_width} is @code{NULL} when @code{getline} is assigning the results to a variable, thus -field parsing is not needed. If the parser does set @code{*field_width}, +field parsing is not needed. + +If the parser sets @code{*field_width}, then @command{gawk} uses this layout to parse the input record, and the @code{PROCINFO["FS"]} value will be @code{"API"} while this record is active in @code{$0}. @@ -36168,15 +36194,7 @@ based upon the value of an @command{awk} variable, as the XML extension from the @code{gawkextlib} project does (@pxref{gawkextlib}). In the latter case, code in a @code{BEGINFILE} rule can look at @code{FILENAME} and @code{ERRNO} to decide whether or -not to activate an input parser (@pxref{BEGINFILE/ENDFILE}). - -You register your input parser with the following function: - -@table @code -@item void register_input_parser(awk_input_parser_t *input_parser); -Register the input parser pointed to by @code{input_parser} with -@command{gawk}. -@end table +not to activate your input parser (@pxref{BEGINFILE/ENDFILE}). If you would like to override the default field parsing mechanism for a given record, then you must populate an @code{awk_fieldwidth_info_t} structure, @@ -36201,7 +36219,7 @@ Set this to @code{awk_true} if the field lengths are specified in terms of potentially multi-byte characters, and set it to @code{awk_false} if the lengths are in terms of bytes. Performance will be better if the values are supplied in -terms of bytes. +terms of bytes. @item size_t nf; Set this to the number of fields in the input record, i.e. @code{NF}. @@ -36216,12 +36234,20 @@ for @code{$1}, and so on through the @code{fields[nf-1]} element containing the @end table A convenience macro @code{awk_fieldwidth_info_size(numfields)} is provided to -calculate the appropriate size of a variable-length +calculate the appropriate size of a variable-length @code{awk_fieldwidth_info_t} structure containing @code{numfields} fields. This can be used as an argument to @code{malloc()} or in a union to allocate space statically. Please refer to the @code{readdir_test} sample extension for an example. +You register your input parser with the following function: + +@table @code +@item void register_input_parser(awk_input_parser_t *input_parser); +Register the input parser pointed to by @code{input_parser} with +@command{gawk}. +@end table + @node Output Wrappers @subsubsection Customized Output Wrappers @cindex customized output wrapper @@ -36325,10 +36351,12 @@ what it does. The @code{@var{XXX}_can_take_file()} function should make a decision based upon the @code{name} and @code{mode} fields, and any additional state (such as @command{awk} variable values) that is appropriate. +@command{gawk} attempts to open the named file for writing. The @code{fp} +member will be @code{NULL} only if it fails. When @command{gawk} calls @code{@var{XXX}_take_control_of()}, that function should fill in the other fields as appropriate, except for @code{fp}, which it should just -use normally. +use normally if it's not @code{NULL}. You register your output wrapper with the following function: @@ -37583,7 +37611,7 @@ The following function allows extensions to access and manipulate redirections. Look up file @code{name} in @command{gawk}'s internal redirection table. If @code{name} is @code{NULL} or @code{name_len} is zero, return data for the currently open input file corresponding to @code{FILENAME}. -(This does not access the @code{filetype} argument, so that may be undefined). +(This does not access the @code{filetype} argument, so that may be undefined). If the file is not already open, attempt to open it. The @code{filetype} argument must be zero-terminated and should be one of: @@ -38950,22 +38978,22 @@ all the variables and functions in the @code{inplace} namespace @c endfile @ignore @c file eg/lib/inplace.awk -# +# # Copyright (C) 2013, 2017, 2019 the Free Software Foundation, Inc. -# +# # This file is part of GAWK, the GNU implementation of the # AWK Programming Language. -# +# # GAWK is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. -# +# # GAWK is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA @@ -40932,6 +40960,10 @@ Redirected @code{getline} became allowed inside (@pxref{BEGINFILE/ENDFILE}). @item +Support for nonfatal I/O +(@pxref{Nonfatal}). + +@item The @code{where} command was added to the debugger (@pxref{Execution Stack}). @@ -43527,7 +43559,7 @@ This is an @command{awk} interpreter written in the @uref{https://golang.org/, Go programming language}. It implements POSIX @command{awk}, with a few minor extensions. Source code is available from @uref{https://github.com/benhoyt/goawk}. -The author wrote a nice +The author wrote a nice @uref{https://benhoyt.com/writings/goawk/, article} describing the implementation. @@ -44655,7 +44687,7 @@ See @inlineraw{docbook, <xref linkend="figure-general-flow"/>}. @ifnotdocbook @float Figure,figure-general-flow @caption{General Program Flow} -@center @image{general-program, , , General program flow} +@center @image{gawk_general-program, , , General program flow} @end float @end ifnotdocbook @@ -44693,7 +44725,7 @@ as shown in @inlineraw{docbook, <xref linkend="figure-process-flow"/>}: @ifnotdocbook @float Figure,figure-process-flow @caption{Basic Program Steps} -@center @image{process-flow, , , Basic Program Stages} +@center @image{gawk_process-flow, , , Basic Program Stages} @end float @end ifnotdocbook diff --git a/doc/gawkworkflow.info b/doc/gawkworkflow.info index 8057651b..591ca0d6 100644 --- a/doc/gawkworkflow.info +++ b/doc/gawkworkflow.info @@ -1,30 +1,30 @@ -This is gawkworkflow.info, produced by makeinfo version 6.8 from +This is gawkworkflow.info, produced by makeinfo version 7.0.1 from gawkworkflow.texi. -Copyright (C) 2017, 2018, 2019, 2020, 2022 Free Software Foundation, +Copyright © 2017, 2018, 2019, 2020, 2022, 2023 Free Software Foundation, Inc. - This is Edition 0.74 of 'Participating in 'gawk' Development'. + This is Edition 0.75 of ‘Participating in ‘gawk’ Development’. Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with the -Invariant Sections being "GNU General Public License", with the -Front-Cover Texts being "A GNU Manual", and with the Back-Cover Texts as +Invariant Sections being “GNU General Public License”, with the +Front-Cover Texts being “A GNU Manual”, and with the Back-Cover Texts as in (a) below. A copy of the license is included in the section entitled -"GNU Free Documentation License". +“GNU Free Documentation License”. - a. The FSF's Back-Cover Text is: "You have the freedom to copy and - modify this GNU manual." + a. The FSF’s Back-Cover Text is: “You have the freedom to copy and + modify this GNU manual.” INFO-DIR-SECTION Text creation and manipulation START-INFO-DIR-ENTRY -* Gawk Work Flow: (gawkworkflow). Participating in 'gawk' development. +* Gawk Work Flow: (gawkworkflow). Participating in ‘gawk’ development. END-INFO-DIR-ENTRY INFO-DIR-SECTION Individual utilities START-INFO-DIR-ENTRY -* Gawk Work Flow: (gawkworkflow)Overview. Participating in 'gawk' development. +* Gawk Work Flow: (gawkworkflow)Overview. Participating in ‘gawk’ development. END-INFO-DIR-ENTRY @@ -34,29 +34,29 @@ General Introduction ******************** This file describes how to participate in software development for GNU -Awk ('gawk') (http://www.gnu.org/software/gawk). +Awk (‘gawk’) (http://www.gnu.org/software/gawk). - Copyright (C) 2017, 2018, 2019, 2020, 2022 Free Software Foundation, -Inc. + Copyright © 2017, 2018, 2019, 2020, 2022, 2023 Free Software +Foundation, Inc. - This is Edition 0.74 of 'Participating in 'gawk' Development'. + This is Edition 0.75 of ‘Participating in ‘gawk’ Development’. Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with the -Invariant Sections being "GNU General Public License", with the -Front-Cover Texts being "A GNU Manual", and with the Back-Cover Texts as +Invariant Sections being “GNU General Public License”, with the +Front-Cover Texts being “A GNU Manual”, and with the Back-Cover Texts as in (a) below. A copy of the license is included in the section entitled -"GNU Free Documentation License". +“GNU Free Documentation License”. - a. The FSF's Back-Cover Text is: "You have the freedom to copy and - modify this GNU manual." + a. The FSF’s Back-Cover Text is: “You have the freedom to copy and + modify this GNU manual.” * Menu: * Preface:: Some introductory remarks. -* Contributing:: How to contribute to 'gawk' +* Contributing:: How to contribute to ‘gawk’ development. * Using Git:: Getting started with Git. * Configuring git:: Configuring Git. @@ -65,7 +65,7 @@ in (a) below. A copy of the license is included in the section entitled * General practices:: How things should usually be done. * Repo Maintenance:: Tips for keeping your repo clean. * Development Stuff:: Things you need to know to be a - 'gawk' developer. + ‘gawk’ developer. * Cheat Sheet:: Git command summary. * Resources:: Some further resources. * TODO:: Stuff still to do. @@ -81,7 +81,7 @@ in (a) below. A copy of the license is included in the section entitled * Branches are state:: Branches represent development state. * Repo State:: The different branch types in the repo. * Local State:: Managing local branches. -* Remotes:: What a "remote" is. +* Remotes:: What a “remote” is. * Cloning:: Cloning the repo the first time. * Switching Branches:: Moving from one branch to another. * Starting A New Branch:: Starting a new branch for development. @@ -93,7 +93,7 @@ in (a) below. A copy of the license is included in the section entitled * Removing Branches:: Getting rid of unneeded branches. * Points to remember:: Things you need to keep in mind. * Initial setup:: Getting started with commit access. -* ssh clone:: Cloning using an 'ssh://' URL. +* ssh clone:: Cloning using an ‘ssh://’ URL. * Developing patches:: Developing patches. * Developing new features:: Developing new features. * Developing fixes:: Developing fixes. @@ -113,8 +113,8 @@ Preface ******* This Info file describes how to participate in development of GNU Awk -('gawk'). GNU Awk is a Free Software project belonging to the Free -Software Foundation's GNU project. +(‘gawk’). GNU Awk is a Free Software project belonging to the Free +Software Foundation’s GNU project. * Menu: @@ -131,7 +131,7 @@ Intended Audience ================= This Info file is aimed at software developers who wish to participate -in 'gawk' development. +in ‘gawk’ development. You should be comfortable working with traditional Unix-style command-line tools, and with the C language and standard library @@ -145,7 +145,7 @@ or Git will be even more helpful. The Info file focuses on participation in the project (that is, how to work most effectively if you wish to contribute to it) and also describes how to make use of the Git (http://git-scm.org) distributed -source code management system for 'gawk' development. +source code management system for ‘gawk’ development. File: gawkworkflow.info, Node: This Manual, Next: Conventions, Prev: Intended Audience, Up: Preface @@ -155,37 +155,37 @@ Using This Book This Info file has the following chapters and appendices: - * *note Contributing:: describes how to start contributing to the - 'gawk' project. + • *note Contributing:: describes how to start contributing to the + ‘gawk’ project. - * *note Using Git:: introduces the Git distributed source code + • *note Using Git:: introduces the Git distributed source code management system. - * *note Configuring git:: describes some initial set-up you need to + • *note Configuring git:: describes some initial set-up you need to do before using Git seriously. - * *note Development without commit access:: gets into the meat of the - development workflow, describing how to work if you don't have + • *note Development without commit access:: gets into the meat of the + development workflow, describing how to work if you don’t have commit access to the Savannah repository. - * *note Development with commit access:: continues the discussion, - covering what's different when you can commit directly to the + • *note Development with commit access:: continues the discussion, + covering what’s different when you can commit directly to the Savannah repository. - * *note General practices:: describes general development practices - used by the 'gawk' development team. + • *note General practices:: describes general development practices + used by the ‘gawk’ development team. - * *note Repo Maintenance:: presents several different things you need + • *note Repo Maintenance:: presents several different things you need to know about to keep your repository in good shape. - * *note Development Stuff:: describes some important points you - should be familiar with in order to participate in 'gawk' + • *note Development Stuff:: describes some important points you + should be familiar with in order to participate in ‘gawk’ development and presents some tools that may make your work easier. - * *note Cheat Sheet:: provides a short "cheat sheet" summarizing all + • *note Cheat Sheet:: provides a short “cheat sheet” summarizing all the Git commands referenced in this Info file. - * *note Resources:: provides a few pointers to Internet resources for + • *note Resources:: provides a few pointers to Internet resources for learning more about Git. @@ -201,22 +201,22 @@ printed and online versions of the documentation. This minor node briefly documents the typographical conventions used in Texinfo. Examples you would type at the command line are preceded by the -common shell primary and secondary prompts, '$' and '>'. Input that you -type is shown 'like this'. Output from the command is preceded by the -glyph "-|". This typically represents the command's standard output. -Error messages and other output on the command's standard error are -preceded by the glyph "error->". For example: +common shell primary and secondary prompts, ‘$’ and ‘>’. Input that you +type is shown ‘like this’. Output from the command is preceded by the +glyph “⊣”. This typically represents the command’s standard output. +Error messages and other output on the command’s standard error are +preceded by the glyph “error→”. For example: $ echo hi on stdout - -| hi on stdout + ⊣ hi on stdout $ echo hello on stderr 1>&2 - error-> hello on stderr + error→ hello on stderr - Characters that you type at the keyboard look 'like this'. In -particular, there are special characters called "control characters." -These are characters that you type by holding down both the 'CONTROL' -key and another key, at the same time. For example, a 'Ctrl-d' is typed -by first pressing and holding the 'CONTROL' key, next pressing the 'd' + Characters that you type at the keyboard look ‘like this’. In +particular, there are special characters called “control characters.” +These are characters that you type by holding down both the ‘CONTROL’ +key and another key, at the same time. For example, a ‘Ctrl-d’ is typed +by first pressing and holding the ‘CONTROL’ key, next pressing the ‘d’ key, and finally releasing both keys. NOTE: Notes of interest look like this. @@ -255,30 +255,30 @@ File: gawkworkflow.info, Node: Contributing, Next: Using Git, Prev: Preface, 1 How to Start Contributing *************************** -'gawk' development is distributed. It's done using electronic mail -("email") and via branches in the Git repository (or "repo") on Savannah -(http://savannah.gnu.org), the GNU project's source code management +‘gawk’ development is distributed. It’s done using electronic mail +(“email”) and via branches in the Git repository (or “repo”) on Savannah +(http://savannah.gnu.org), the GNU project’s source code management site. - In this major node we use some Git terminology. If you're not at all + In this major node we use some Git terminology. If you’re not at all familiar with Git, then skim this major node and come back after reading the rest of the Info file. - 'gawk' is similar to many other Free Software projects. To begin -contributing, simply start! Take a look at the 'TODO' file in the + ‘gawk’ is similar to many other Free Software projects. To begin +contributing, simply start! Take a look at the ‘TODO’ file in the distribution, see if there is something of interest to you, and ask on the <bug-gawk@gnu.org> mailing list if anyone else is working on it. If not, then go for it! (*Note Development Stuff:: for a discussion of -some of the technical things you'll need to do. Here we describe the +some of the technical things you’ll need to do. Here we describe the process in general.) - Your contribution can be almost anything that is relevant for 'gawk', + Your contribution can be almost anything that is relevant for ‘gawk’, such as code fixes, documentation fixes, and/or new features. - NOTE: If possible, new features should be done using 'gawk''s + NOTE: If possible, new features should be done using ‘gawk’’s extension mechanism. If you want to add a user-visible language - change to the 'gawk' core, you're going to have to convince the - maintainer and the other developers that it's really worthwhile to + change to the ‘gawk’ core, you’re going to have to convince the + maintainer and the other developers that it’s really worthwhile to do so. Changes that improve performance or portability, or that fix bugs, @@ -286,28 +286,28 @@ such as code fixes, documentation fixes, and/or new features. convincing, of course. As you complete a task, submit patches for review to the -<bug-gawk@gnu.org> mailing list, where you'll be given feedback about +<bug-gawk@gnu.org> mailing list, where you’ll be given feedback about your work. Once your changes are acceptable, the maintainer will commit them to the Git repository. Over time, as the maintainer and development team gain confidence in -your ability to contribute, you may be asked to join the private 'gawk' -developers' mailing list, and/or be granted commit access to the Git +your ability to contribute, you may be asked to join the private ‘gawk’ +developers’ mailing list, and/or be granted commit access to the Git repository on Savannah. This has happened to more than one person who -just "came out of the woodwork." +just “came out of the woodwork.” - Until that happens, or if you don't want to join the list, you should + Until that happens, or if you don’t want to join the list, you should continue to work with private branches and submission of patches to the mailing list. Once you have commit access, if you want to make a major change or add a major feature, where the patch(es) would be very large, it has -become the practice to create a separate branch, based off of 'master', +become the practice to create a separate branch, based off of ‘master’, to host the feature. This way the maintainer can review it, and you can -continue to improve it, until it's ready for integration into 'master'. +continue to improve it, until it’s ready for integration into ‘master’. - NOTE: Because of the GNU project's requirements for signed - paperwork for contributions, the 'gawk' project will *not* work + NOTE: Because of the GNU project’s requirements for signed + paperwork for contributions, the ‘gawk’ project will *not* work with pull requests from GitHub (http://github.com) or any other Git-based software hosting service. You must submit patches to the mailing list, and be willing to sign paperwork for large patches @@ -315,9 +315,9 @@ continue to improve it, until it's ready for integration into 'master'. The <bug-gawk@gnu.org> mailing list is not private. Anyone may send mail to it, and anyone may subscribe to it. To subscribe, go to the -list's web page (https://lists.gnu.org/mailman/listinfo/bug-gawk) and +list’s web page (https://lists.gnu.org/mailman/listinfo/bug-gawk) and follow the instructions there. If you plan to be involved long-term -with 'gawk' development, then you probably should subscribe to the list. +with ‘gawk’ development, then you probably should subscribe to the list. File: gawkworkflow.info, Node: Using Git, Next: Configuring git, Prev: Contributing, Up: Top @@ -329,7 +329,7 @@ This chapter provides an introduction to using Git. Our point is _not_ to rave about how wonderful Git is, nor to go into painful detail about how it works. Rather we want to give you enough background to understand how to use Git effectively for bug fix and feature -development and to interact ("play nicely") with the development team. +development and to interact (“play nicely”) with the development team. * Menu: @@ -341,11 +341,11 @@ development and to interact ("play nicely") with the development team. File: gawkworkflow.info, Node: Push Pull, Next: Repo Copies, Up: Using Git -2.1 The "Push/Pull" Model of Software Development +2.1 The “Push/Pull” Model of Software Development ================================================= Git is a powerful, distributed source code management system. However, -the way it's used for 'gawk' development purposely does not take +the way it’s used for ‘gawk’ development purposely does not take advantage of all its features. Instead, the model is rather simple, and in many ways much like more @@ -353,7 +353,7 @@ traditional distributed systems such as the Concurrent Versions System (http://www.nongnu.org/cvs) (CVS) or Subversion (http://subversion.apache.org) (SVN). - The central idea can be termed "push/pull." You _pull_ updates down + The central idea can be termed “push/pull.” You _pull_ updates down from the central repository to your local copy, and if you have commit rights, you _push_ your changes or updates up to the central repository. @@ -367,8 +367,8 @@ branch up to date with respect to the main development branch(es), and eventually merge the changes from your branch into the main branch. Almost always Git does these merges for you without problem. When -there is a problem (a "merge conflict"), usually it is very easy for you -to "resolve" them and then complete the merge. We talk about this in +there is a problem (a “merge conflict”), usually it is very easy for you +to “resolve” them and then complete the merge. We talk about this in more detail later (*note Merge Conflicts::). @@ -379,18 +379,18 @@ File: gawkworkflow.info, Node: Repo Copies, Next: Local Branches, Prev: Push So how does Git work?(1) - A repository consists of a collection of "branches". Each branch + A repository consists of a collection of “branches”. Each branch represents the history of a collection of files and directories (a file -"tree"). Each combined set of changes to this collection (files and +“tree”). Each combined set of changes to this collection (files and directories added or deleted, and/or file contents changed) is termed a -"commit". +“commit”. - When you first create a local copy of a remote repository ("clone the -repo"), Git copies all of the original repository's branches to your + When you first create a local copy of a remote repository (“clone the +repo”), Git copies all of the original repository’s branches to your local system. The original remote repository is referred to as being -"upstream", and your local repo is "downstream" from it. Git +“upstream”, and your local repo is “downstream” from it. Git distinguishes branches from the upstream repo by prefixing their names -with 'origin/'. Let's draw some pictures. *note Figure 2.1: +with ‘origin/’. Let’s draw some pictures. *note Figure 2.1: savannah-repo. represents the state of the repo on Savannah: @@ -408,21 +408,21 @@ savannah-repo. represents the state of the repo on Savannah: | ... | +----------------------+ -Figure 2.1: The Savannah 'gawk' Repository +Figure 2.1: The Savannah ‘gawk’ Repository After you clone the repo, on your local system you will have a single -branch named 'master' that's visible when you use 'git branch' to see +branch named ‘master’ that’s visible when you use ‘git branch’ to see your branches. $ git clone http://git.savannah.gnu.org/r/gawk.git Clone the repo $ cd gawk Change to local copy $ git branch See branch information - -| * master + ⊣ * master -The current branch is always indicated with a leading asterisk ('*'). +The current branch is always indicated with a leading asterisk (‘*’). Pictorially, the local repo looks like *note Figure 2.2: your-repo. -(you can ignore the 'T' column for the moment): +(you can ignore the ‘T’ column for the moment): +===+======================++=============================+ @@ -439,18 +439,18 @@ The current branch is always indicated with a leading asterisk ('*'). | | || ... | +---+----------------------++-----------------------------+ -Figure 2.2: Your Local 'gawk' Repository +Figure 2.2: Your Local ‘gawk’ Repository -Note that what is simply 'gawk-4.1-stable' in the upstream repo is now -referred to as 'origin/gawk-4.1-stable'. The 'origin/' branches are a +Note that what is simply ‘gawk-4.1-stable’ in the upstream repo is now +referred to as ‘origin/gawk-4.1-stable’. The ‘origin/’ branches are a snapshot of the state of the upstream repo. This is how Git allows you -to see what changes you've made with respect to the upstream repo, +to see what changes you’ve made with respect to the upstream repo, without having to actually communicate with the upstream repo over the Internet. (When files are identical, Git is smart enough to not have two separate physical copies on your local disk.) - If you're working on a simple bug fix or change, you can do so -directly in your local 'master' branch. You can then commit your + If you’re working on a simple bug fix or change, you can do so +directly in your local ‘master’ branch. You can then commit your changes, and if you have access rights, push them upstream to the Savannah repo. (However, there is a process to follow. Please read the rest of this Info file.) @@ -465,40 +465,40 @@ File: gawkworkflow.info, Node: Local Branches, Next: Branches are state, Prev 2.3 Local Branches ================== -Let's talk about local branches in more detail. (The terminology used +Let’s talk about local branches in more detail. (The terminology used here is my own, not official Git jargon.) There are two kinds of local branches: -"Tracking Branches" +“Tracking Branches” Tracking branches track branches from the upstream repository. You first create a tracking branch simply by checking out a branch from the upstream. You use the branch name without the leading - 'origin/' prefix. For example, 'git checkout gawk-4.1-stable'. + ‘origin/’ prefix. For example, ‘git checkout gawk-4.1-stable’. You can then work on this branch, making commits to it as you wish. - Once things are ready to move upstream, you simply use 'git push', + Once things are ready to move upstream, you simply use ‘git push’, and your changes will be pushed up to the main repo.(1) - You should *never* checkout a branch using the 'origin/' prefix. + You should *never* checkout a branch using the ‘origin/’ prefix. Things will get very confused. Always work on local tracking branches. -"Purely Local Branches" - A "purely local branch" exists only on your system. You may be +“Purely Local Branches” + A “purely local branch” exists only on your system. You may be developing some large new feature, or fixing a very difficult bug, or have a change for which paperwork has not yet been completed. In such a case, you would keep your changes on a local branch, and - periodically synchronize it with 'master' (or whichever upstream + periodically synchronize it with ‘master’ (or whichever upstream branch you started from). This may seem somewhat abstract so far. We demonstrate with commands and branches in *note Development without commit access::, later in this Info file. - Let's say you have checked out a copy of 'gawk-4.1-stable' and have -created a purely local branch named 'better-random'. Then our picture -now looks like *note Figure 2.3: your-repo-2, where the 'T' column + Let’s say you have checked out a copy of ‘gawk-4.1-stable’ and have +created a purely local branch named ‘better-random’. Then our picture +now looks like *note Figure 2.3: your-repo-2, where the ‘T’ column indicates a tracking branch. @@ -518,7 +518,7 @@ indicates a tracking branch. | | better-random || | +---+----------------------++-----------------------------+ -Figure 2.3: Your Local 'gawk' Repository With a Purely Local Branch +Figure 2.3: Your Local ‘gawk’ Repository With a Purely Local Branch ---------- Footnotes ---------- @@ -532,17 +532,17 @@ File: gawkworkflow.info, Node: Branches are state, Prev: Local Branches, Up: Branches represent development state. At any given time, when you checkout a particular branch (or create a new one), you have a copy of -the 'gawk' source tree that you should be able to build and test. +the ‘gawk’ source tree that you should be able to build and test. The following minor nodes describe the different branches in the -'gawk' repository and what they are for, as well as how to use your own +‘gawk’ repository and what they are for, as well as how to use your own branches. * Menu: * Repo State:: The different branch types in the repo. * Local State:: Managing local branches. -* Remotes:: What a "remote" is. +* Remotes:: What a “remote” is. File: gawkworkflow.info, Node: Repo State, Next: Local State, Up: Branches are state @@ -552,46 +552,46 @@ File: gawkworkflow.info, Node: Repo State, Next: Local State, Up: Branches ar There are several kinds of branches in the Savannah repository. -"Dead Branches" - Branches with the prefix 'dead-branches/' (such as - 'dead-branches/const') hold code that was never merged into the +“Dead Branches” + Branches with the prefix ‘dead-branches/’ (such as + ‘dead-branches/const’) hold code that was never merged into the main code base. For example, a feature which was started, but later deemed to be unwise to add. These branches keep the code available, but they are not updated. -"Stable Branches" +“Stable Branches” These branches are used for bug fixes to released versions of - 'gawk'. Sometimes new development (i.e., user-visible changes) + ‘gawk’. Sometimes new development (i.e., user-visible changes) also occurs on these branches, although in a perfect world they would be used only for bug fixes. - These branches have names like 'gawk-4.1-stable', - 'gawk-4.0-stable', and so on. Once a release has been made from - 'master', the previous stable branch is not updated. For example, - once 'gawk' 4.1.0 was released, no more work was done on - 'gawk-4.0-stable'. + These branches have names like ‘gawk-4.1-stable’, + ‘gawk-4.0-stable’, and so on. Once a release has been made from + ‘master’, the previous stable branch is not updated. For example, + once ‘gawk’ 4.1.0 was released, no more work was done on + ‘gawk-4.0-stable’. -"The Main Branch" - This is the 'master' branch. Here is where most new feature +“The Main Branch” + This is the ‘master’ branch. Here is where most new feature development takes place, and releases of new major versions are based off of this branch. Feature branches are typically based off this branch as well, and when the feature is deemed complete, merged back into it. -"Feature Branches" +“Feature Branches” Often, a proposed new feature or code improvement is quite - involved. It may take some time to perfect, or the 'gawk' + involved. It may take some time to perfect, or the ‘gawk’ development team may not be convinced that the feature should be kept. - For this purpose, the team uses branches prefixed with 'feature/'. + For this purpose, the team uses branches prefixed with ‘feature/’. This prefix is used even for code that simply improves the internals and does not make a user-visible change. Having large changes on separate branches makes it easier for members of the team to review the code, and also makes it easier to - keep the changes up-to-date with respect to 'master', since Git + keep the changes up-to-date with respect to ‘master’, since Git excels at merging commits from one branch to another. @@ -601,9 +601,9 @@ File: gawkworkflow.info, Node: Local State, Next: Remotes, Prev: Repo State, --------------------------------------- Purely local branches are where you do your own development. You may -use purely local branches because you don't have commit rights to the +use purely local branches because you don’t have commit rights to the Savannah repo. You may also use them if you are doing some work that -isn't ready for sharing with the rest of the team, or cannot be +isn’t ready for sharing with the rest of the team, or cannot be committed for some other reason. For example, for around a nine-month period, the maintainer kept a @@ -618,30 +618,30 @@ File: gawkworkflow.info, Node: Remotes, Prev: Local State, Up: Branches are s Earlier, we said that Git maintains copies of the branches in the upstream repo, as well as manages your local branches. You can see all -these branches with 'git branch -a': +these branches with ‘git branch -a’: $ git branch -a - -| gawk-4.1-stable - -| * master - -| remotes/origin/HEAD -> origin/master - -| remotes/origin/dead-branches/async-events - -| ... - -| remotes/origin/feature/api-mpfr - -| remotes/origin/feature/array-iface - -| remotes/origin/feature/fix-comments - -| ... - - You'll note that what we've referred to as 'origin/' branches appear -in the output with an additional prefix: 'remotes/'. Up to this point, -we've treated Git as if it allowed only a single upstream repository. + ⊣ gawk-4.1-stable + ⊣ * master + ⊣ remotes/origin/HEAD -> origin/master + ⊣ remotes/origin/dead-branches/async-events + ⊣ ... + ⊣ remotes/origin/feature/api-mpfr + ⊣ remotes/origin/feature/array-iface + ⊣ remotes/origin/feature/fix-comments + ⊣ ... + + You’ll note that what we’ve referred to as ‘origin/’ branches appear +in the output with an additional prefix: ‘remotes/’. Up to this point, +we’ve treated Git as if it allowed only a single upstream repository. But in fact, you can configure it to use more than one. All the known -upstream repositories are grouped under the 'remotes/' prefix, with -'remotes/origin' being the one from which you initially cloned your +upstream repositories are grouped under the ‘remotes/’ prefix, with +‘remotes/origin’ being the one from which you initially cloned your local repository. The ability to work with multiple upstream repositories is an -advanced one; 'gawk' development does not make use of it. The intent of -this node is to explain the output from 'git branch -a', nothing more. +advanced one; ‘gawk’ development does not make use of it. The intent of +this node is to explain the output from ‘git branch -a’, nothing more. File: gawkworkflow.info, Node: Configuring git, Next: Development without commit access, Prev: Using Git, Up: Top @@ -650,11 +650,11 @@ File: gawkworkflow.info, Node: Configuring git, Next: Development without comm ************************************* Before starting to use Git, you should configure it with some important -settings that won't change as you use Git. You may configure options +settings that won’t change as you use Git. You may configure options both globally, and on a per-repository basis. Here, we discuss only global configuration settings. - You can configure Git using either 'git config', or by editing the + You can configure Git using either ‘git config’, or by editing the relevant files with your favorite text editor.(1) The first things to set are your email address and your real name: @@ -663,14 +663,14 @@ relevant files with your favorite text editor.(1) $ git config --global user.email jpdev@example.com Set email address Setting these two items are an absolute requirement. *Note*: No -aliases are allowed. If you can't supply your real name, you cannot -contribute to the project. Other options that the 'gawk' maintainer +aliases are allowed. If you can’t supply your real name, you cannot +contribute to the project. Other options that the ‘gawk’ maintainer recommends that you use are: $ git config --global push.default simple Only push the current branch $ git config --global pager.status true Use pager for output of git status - The global settings are stored in the '.gitconfig' file in your home + The global settings are stored in the ‘.gitconfig’ file in your home directory. The file looks like this: [user] @@ -681,37 +681,37 @@ directory. The file looks like this: [pager] status = true - The 'push.default=simple' setting ensures that older versions of Git + The ‘push.default=simple’ setting ensures that older versions of Git only push the current branch up to the Savannah repo. This is the safest way to operate, and is the default in current Git versions. There may be other settings in your configuration file as well. Use -'git config' to see your settings: +‘git config’ to see your settings: $ git config --list - -| user.name=J. P. Developer - -| user.email=jpdev@example.com - -| push.default=simple + ⊣ user.name=J. P. Developer + ⊣ user.email=jpdev@example.com + ⊣ push.default=simple - Here are the 'gawk' maintainer's settings: + Here are the ‘gawk’ maintainer’s settings: $ git config --global --list - -| user.name=Arnold D. Robbins - -| user.email=arnold@... - -| credential.helper=cache --timeout=3600 - -| push.default=simple - -| color.ui=false - -| core.autocrlf=input - -| pager.status=true - -| log.decorate=auto - - Additional, per-project ("local") settings are stored in each repo's -'.git/config' file. + ⊣ user.name=Arnold D. Robbins + ⊣ user.email=arnold@... + ⊣ credential.helper=cache --timeout=3600 + ⊣ push.default=simple + ⊣ color.ui=false + ⊣ core.autocrlf=input + ⊣ pager.status=true + ⊣ log.decorate=auto + + Additional, per-project (“local”) settings are stored in each repo’s +‘.git/config’ file. ---------- Footnotes ---------- (1) You are required to use either Vim or Emacs, other text editors -are not allowed. Of course, reasonable developers wouldn't want to use +are not allowed. Of course, reasonable developers wouldn’t want to use any other editor anyway. @@ -721,7 +721,7 @@ File: gawkworkflow.info, Node: Development without commit access, Next: Develo *********************************** In this chapter we present step-by-step recipes for checking out and -working with a local copy of the Savannah Git repo for 'gawk'. The +working with a local copy of the Savannah Git repo for ‘gawk’. The presentation is for when you do not have commit access to the Git repo, and so you cannot push your changes directly. @@ -743,15 +743,15 @@ File: gawkworkflow.info, Node: Cloning, Next: Switching Branches, Up: Develop 4.1 Cloning The Repo ==================== -Clone the Savannah repo using 'git clone'. You should do so using using +Clone the Savannah repo using ‘git clone’. You should do so using using the HTTPS protocol; HTTPS is considered to be more secure than the native Git protocol and is preferred.(1) - To choose which method, you supply a "URL" for the repo when you + To choose which method, you supply a “URL” for the repo when you clone it, as follows. $ git clone https://git.savannah.gnu.org/r/gawk.git Clone the repo - -| ... + ⊣ ... $ cd gawk Start working _You only need to clone the repo once._ From then on, you update its @@ -760,7 +760,7 @@ your vacation in the Bahamas: $ cd gawk Move to the repo $ make distclean A good idea before updating - -| ... + ⊣ ... $ git pull Update it To build, you should generally follow this recipe: @@ -768,11 +768,11 @@ your vacation in the Bahamas: $ ./bootstrap.sh && ./configure && make -j && make check NOTE: Unless you have installed all the tools described in *note - GNU Tools::, you _must_ run './bootstrap.sh' every time you clone a - repo, do a 'git pull' or checkout a different branch. (In the - latter case, do 'make distclean' first.) Otherwise things will get - messy very quickly. The 'bootstrap.sh' script ensures that all of - the file time stamps are up to date so that it's not necessary to + GNU Tools::, you _must_ run ‘./bootstrap.sh’ every time you clone a + repo, do a ‘git pull’ or checkout a different branch. (In the + latter case, do ‘make distclean’ first.) Otherwise things will get + messy very quickly. The ‘bootstrap.sh’ script ensures that all of + the file time stamps are up to date so that it’s not necessary to run the various configuration tools. ---------- Footnotes ---------- @@ -785,14 +785,14 @@ File: gawkworkflow.info, Node: Switching Branches, Next: Starting A New Branch 4.2 Switching Branches ====================== -So far, we've been working in the default 'master' branch. Let's check -what's happening in the 'gawk-4.1-stable' branch: +So far, we’ve been working in the default ‘master’ branch. Let’s check +what’s happening in the ‘gawk-4.1-stable’ branch: $ make distclean Clean up $ git checkout gawk-4.1-stable Checkout a different branch - -| ... + ⊣ ... $ git pull Get up to date - -| ... + ⊣ ... $ ./bootstrap.sh && ./configure && Start working > make -j && make check @@ -802,61 +802,61 @@ File: gawkworkflow.info, Node: Starting A New Branch, Next: Undoing a change, 4.3 Starting A New Branch ========================= -Let's say you want to work on a new feature. For example, you might +Let’s say you want to work on a new feature. For example, you might decide to add Python syntax support.(1) You should create a new branch -on which to work. First, switch back to 'master': +on which to work. First, switch back to ‘master’: $ make distclean $ git checkout master Now, create a new branch. The easiest way to do that is with the -'-b' option to 'git checkout': +‘-b’ option to ‘git checkout’: $ git checkout -b feature/python - -| ... + ⊣ ... You now do massive amounts of work in order to add Python syntax support. As you do each defined chunk of work, you update the -'ChangeLog' file with your changes before "committing" them to the repo. +‘ChangeLog’ file with your changes before “committing” them to the repo. - Let's say you've added a new file 'python.c' and updated several -others. Use 'git status' to see what's changed: + Let’s say you’ve added a new file ‘python.c’ and updated several +others. Use ‘git status’ to see what’s changed: $ git status - -| ... + ⊣ ... - Before committing the current set of changes, you can use 'git diff' -to view the changes. You may also use 'git difftool'(2) to run an -external 'diff' command, such as 'meld' on GNU/Linux: + Before committing the current set of changes, you can use ‘git diff’ +to view the changes. You may also use ‘git difftool’(2) to run an +external ‘diff’ command, such as ‘meld’ on GNU/Linux: $ git diff Regular built-in tool for standard diffs $ git difftool --tool=meld GUI diff tool - When you're happy with the changes, use 'git add' to tell Git which + When you’re happy with the changes, use ‘git add’ to tell Git which of the changed and/or new files you wish to have ready to be committed: $ git add ... - Use 'git status' to see that your changes are scheduled for + Use ‘git status’ to see that your changes are scheduled for committing: $ git status - -| + ⊣ Now you can commit your changes to your branch: $ git commit -Running 'git commit' causes Git to invoke an editor (typically from the -'$EDITOR' environment variable) in which you can compose a commit +Running ‘git commit’ causes Git to invoke an editor (typically from the +‘$EDITOR’ environment variable) in which you can compose a commit message. Please supply a short message summarizing the commit. This -message will be visible via 'git log'. +message will be visible via ‘git log’. ---------- Footnotes ---------- - (1) Just joking. Please don't attempt this for real. + (1) Just joking. Please don’t attempt this for real. - (2) Don't run 'git difftool' in the background; it works + (2) Don’t run ‘git difftool’ in the background; it works interactively. @@ -871,7 +871,7 @@ checking out the file again: $ git checkout awkgram.y Undo changes to awkgram.y. There is no output - To start over completely, use 'git reset --hard'. Note that this + To start over completely, use ‘git reset --hard’. Note that this will _throw away_ all your changes, with no chance for recovery, so be sure you really want to do it. @@ -882,14 +882,14 @@ File: gawkworkflow.info, Node: Saving Without Committing, Next: Updating, Pre ======================== Sometimes, you may be in the middle of a set of changes that are not yet -completed, when you need to stop what you're doing and work on something +completed, when you need to stop what you’re doing and work on something else. For example, you might be updating the documentation when a bug -report comes in and you want to work on the bug. But you can't just -switch branches, since you haven't finished your current changes. +report comes in and you want to work on the bug. But you can’t just +switch branches, since you haven’t finished your current changes. - The way to work around this problem is with 'git stash'. This + The way to work around this problem is with ‘git stash’. This command saves your changes in a special place within Git from which they -may be restored later. After executing 'git stash', your current branch +may be restored later. After executing ‘git stash’, your current branch is restored to its original, pristine state. The workflow might go something like this: @@ -903,8 +903,8 @@ is restored to its original, pristine state. $ git stash pop Restore the earlier changes The stash is maintained as a stack. Sets of changes are pushed onto -the stack by 'git stash' and popped off of it with 'git stash pop'. You -may use 'git stash list' to see the list of saved changes. +the stack by ‘git stash’ and popped off of it with ‘git stash pop’. You +may use ‘git stash list’ to see the list of saved changes. File: gawkworkflow.info, Node: Updating, Next: Submitting Changes, Prev: Saving Without Committing, Up: Development without commit access @@ -913,7 +913,7 @@ File: gawkworkflow.info, Node: Updating, Next: Submitting Changes, Prev: Savi ======================== As you work on your branch, you will occasionally want to bring it up to -date with respect to 'master'. This minor node discusses updating local +date with respect to ‘master’. This minor node discusses updating local branches and handling merge conflicts. * Menu: @@ -928,13 +928,13 @@ File: gawkworkflow.info, Node: Rebasing, Next: Merge Conflicts, Up: Updating ----------------------------- For purely local branches, bringing your branch up to date is called -"rebasing", which causes the branch to look _as if_ you had started from -the latest version of 'master'. The steps are as follows: +“rebasing”, which causes the branch to look _as if_ you had started from +the latest version of ‘master’. The steps are as follows: $ git checkout master Checkout master $ git pull Update it $ git checkout feature/python Move back to new, purely local branch - $ git rebase master "Start over" from current master + $ git rebase master ``Start over'' from current master File: gawkworkflow.info, Node: Merge Conflicts, Prev: Rebasing, Up: Updating @@ -942,45 +942,45 @@ File: gawkworkflow.info, Node: Merge Conflicts, Prev: Rebasing, Up: Updating 4.6.2 Dealing With Merge Conflicts ---------------------------------- -Sometimes, when merging from 'master' into your branch, or from a branch -into 'master', there will be "merge conflicts". These are one or more +Sometimes, when merging from ‘master’ into your branch, or from a branch +into ‘master’, there will be “merge conflicts”. These are one or more areas within a file where there are conflicting sets of changes, and Git could not do the merge for you. In this case, the conflicted area will -be delimited by the traditional conflict markers, '<<<', '===' and -'>>>'. +be delimited by the traditional conflict markers, ‘<<<’, ‘===’ and +‘>>>’. - Your mission then is to edit the file and "resolve" the conflict by -fixing the order of additions (such as in a 'ChangeLog' file), or fixing + Your mission then is to edit the file and “resolve” the conflict by +fixing the order of additions (such as in a ‘ChangeLog’ file), or fixing the code to take new changes into account. - Once you have done so, you tell Git that everything is OK using 'git -add' and 'git commit': + Once you have done so, you tell Git that everything is OK using ‘git +add’ and ‘git commit’: $ git checkout feature/python Move back to new, purely local branch - $ git rebase master "Start over" from current master - -| First, rewinding head to replay your work on top of it... - -| Applying: Demo change. - -| Using index info to reconstruct a base tree... - -| M main.c - -| Falling back to patching base and 3-way merge... - -| Auto-merging main.c - -| CONFLICT (content): Merge conflict in main.c - -| error: Failed to merge in the changes. - -| Patch failed at 0001 Demo change. - -| Use 'git am --show-current-patch' to see the failed patch - -| - -| Resolve all conflicts manually, mark them as resolved with - -| "git add/rm <conflicted_files>", then run "git rebase --continue". - -| You can instead skip this commit: run "git rebase --skip". - -| To abort and get back to the state before "git rebase", run "git rebase --abort". + $ git rebase master ``Start over'' from current master + ⊣ First, rewinding head to replay your work on top of it... + ⊣ Applying: Demo change. + ⊣ Using index info to reconstruct a base tree... + ⊣ M main.c + ⊣ Falling back to patching base and 3-way merge... + ⊣ Auto-merging main.c + ⊣ CONFLICT (content): Merge conflict in main.c + ⊣ error: Failed to merge in the changes. + ⊣ Patch failed at 0001 Demo change. + ⊣ Use 'git am --show-current-patch' to see the failed patch + ⊣ + ⊣ Resolve all conflicts manually, mark them as resolved with + ⊣ "git add/rm <conflicted_files>", then run "git rebase --continue". + ⊣ You can instead skip this commit: run "git rebase --skip". + ⊣ To abort and get back to the state before "git rebase", run "git rebase --abort". $ gvim main.c Edit the file and fix the problem $ git add main.c Tell Git everything is OK now ... $ git commit ... and it's settled $ git rebase --continue Continue the rebase - The 'git rebase --continue' then continues the process of rebasing -the current branch that we started in *note Rebasing::. It's not -necessary if you are using 'git merge' (*note Points to remember::). + The ‘git rebase --continue’ then continues the process of rebasing +the current branch that we started in *note Rebasing::. It’s not +necessary if you are using ‘git merge’ (*note Points to remember::). File: gawkworkflow.info, Node: Submitting Changes, Next: Removing Branches, Prev: Updating, Up: Development without commit access @@ -988,11 +988,11 @@ File: gawkworkflow.info, Node: Submitting Changes, Next: Removing Branches, P 4.7 Submitting Your Changes =========================== -So now your feature is complete. You've added test cases for it to the -test suite(1), you have 'ChangeLog' entries that describe all the +So now your feature is complete. You’ve added test cases for it to the +test suite(1), you have ‘ChangeLog’ entries that describe all the changes(2), you have documented the new feature(3), and everything works -great. You're ready to submit the changes for review, and with any -luck, inclusion into 'gawk'. +great. You’re ready to submit the changes for review, and with any +luck, inclusion into ‘gawk’. There are two ways to submit your changes for review. @@ -1003,29 +1003,34 @@ _Generate a single large patch_ $ git checkout feature/python $ git diff master > /tmp/python.diff - Mail the 'python.diff' file to the appropriate mailing list along - with a description of what you've changed and why. + Mail the ‘python.diff’ file to the appropriate mailing list along + with a description of what you’ve changed and why. + + The patch file will likely contain changes to generated files, such + as ‘awkgram.c’ or ‘Makefile.in’. If you are comfortable manually + editing the patch file to remove those changes, do so. If not, + then send the file as-is and the maintainer will handle it. _Generate a set of patches that in toto comprise your changes_ - To do this, use 'git format-patch': + To do this, use ‘git format-patch’: $ git checkout feature/python $ git format-patch - This creates a set of patch files, one per commit that isn't on the + This creates a set of patch files, one per commit that isn’t on the original branch. Mail these patches, either separately, or as a set of attachments, to the appropriate mailing list along with a - description of what you've changed and why. + description of what you’ve changed and why. - Either way you choose to submit your changes, the 'gawk' maintainer + Either way you choose to submit your changes, the ‘gawk’ maintainer and development team will review your changes and provide feedback. If -you have signed paperwork with the FSF for 'gawk' and the maintainer +you have signed paperwork with the FSF for ‘gawk’ and the maintainer approves your changes, he will apply the patch(es) and commit the changes. Which list should you send mail to? If you are just starting to contribute, use <bug-gawk@gnu.org>. After making enough contributions, -you may be invited to join the private 'gawk' developers' mailing list. +you may be invited to join the private ‘gawk’ developers’ mailing list. If you do so, then submit your changes to that list. If you make any substantial changes, you will need to assign @@ -1035,11 +1040,11 @@ information. ---------- Footnotes ---------- - (1) You did do this, didn't you? + (1) You did do this, didn’t you? (2) You remembered this, right? - (3) You wouldn't neglect this, would you? + (3) You wouldn’t neglect this, would you? File: gawkworkflow.info, Node: Removing Branches, Next: Points to remember, Prev: Submitting Changes, Up: Development without commit access @@ -1063,14 +1068,14 @@ File: gawkworkflow.info, Node: Points to remember, Prev: Removing Branches, U There are some important points to remember: - * Always do a 'make distclean' before switching between branches. - Things will get really confused if you don't. + • Always do a ‘make distclean’ before switching between branches. + Things will get really confused if you don’t. - * For upstream branches, _always_ work with tracking branches. - _Never_ use 'git checkout origin/WHATEVER'. Git will happily let - you do something like that, but it's just plain asking for trouble. + • For upstream branches, _always_ work with tracking branches. + _Never_ use ‘git checkout origin/WHATEVER’. Git will happily let + you do something like that, but it’s just plain asking for trouble. - * Make sure your tracking branches are up-to-date before doing + • Make sure your tracking branches are up-to-date before doing anything with them, particularly using them as the basis for a rebase or merge. This typically means a three-step process: @@ -1082,10 +1087,10 @@ There are some important points to remember: $ git rebase master Now rebase your feature off of master - * Git always treats the currently checked-out branch as the object of + • Git always treats the currently checked-out branch as the object of operations. For example, when comparing files with the regular - 'diff' command, the usage is 'diff OLDFILE NEWFILE'. For 'git - diff', the current branch takes the place of NEWFILE, thus: + ‘diff’ command, the usage is ‘diff OLDFILE NEWFILE’. For ‘git + diff’, the current branch takes the place of NEWFILE, thus: $ git checkout feature/python $ git diff master Compare master to current branch @@ -1103,12 +1108,12 @@ File: gawkworkflow.info, Node: Development with commit access, Next: General p ******************************** This major node describes how to do development when you _do_ have -commit access to the 'gawk' repo on Savannah. +commit access to the ‘gawk’ repo on Savannah. * Menu: * Initial setup:: Getting started with commit access. -* ssh clone:: Cloning using an 'ssh://' URL. +* ssh clone:: Cloning using an ‘ssh://’ URL. * Developing patches:: Developing patches. * Developing new features:: Developing new features. * Developing fixes:: Developing fixes. @@ -1119,34 +1124,34 @@ File: gawkworkflow.info, Node: Initial setup, Next: ssh clone, Up: Developmen 5.1 Initial Setup ================= -Congratulations! After becoming a quality contributor to 'gawk' -development, you've been invited to join the private development list +Congratulations! After becoming a quality contributor to ‘gawk’ +development, you’ve been invited to join the private development list and to accept having commit access to the repo. The first thing to do is to create an account on Savannah, choosing a unique user name. To do so, go to the Savannah home page -(http://savannah.gnu.org) and click on the "New User" link. The setup -will include uploading of your 'ssh' key, as per the instructions on the +(http://savannah.gnu.org) and click on the “New User” link. The setup +will include uploading of your ‘ssh’ key, as per the instructions on the Savannah web page. - After you've done all this, send email to the maintainer with your + After you’ve done all this, send email to the maintainer with your Savannah user name, and he will add you to the list of users who have commit access to the repo. File: gawkworkflow.info, Node: ssh clone, Next: Developing patches, Prev: Initial setup, Up: Development with commit access -5.2 Cloning The Repo With An 'ssh' URL +5.2 Cloning The Repo With An ‘ssh’ URL ====================================== In order to be able to commit changes to the repo, you must clone it -using an 'ssh://' URL. Cloning the repo with 'ssh' is similar to cloning +using an ‘ssh://’ URL. Cloning the repo with ‘ssh’ is similar to cloning with HTTPS, but the URL is different: $ git clone ssh://yourname@git.sv.gnu.org/srv/git/gawk.git - -| ... + ⊣ ... - Here, you should replace 'yourname' in the command with the user name + Here, you should replace ‘yourname’ in the command with the user name you chose for use on Savannah. @@ -1160,20 +1165,21 @@ without commit access: 1. Develop the code and test it. - 2. Update the 'ChangeLog'. + 2. Update the ‘ChangeLog’. - 3. If necessary, update the documentation: 'doc/gawktexi.in' and/or - 'doc/gawk.1'. + 3. If necessary, update the documentation: ‘doc/gawktexi.in’ and/or + ‘doc/gawk.1’. - 4. Use 'git diff > mychange.diff' to create a patch file. + 4. Use ‘git diff > mychange.diff’ to create a patch file. (If you + want, remove the diffs for generated files.) 5. Send it to the mailing list for discussion. 6. Iterate until the patch is ready to be committed. However, now that you have commit access, you can commit the fix and -push it up to the repo yourself! Let's assume you've made a bug fix -directly on 'master'. Here's how to commit your changes: +push it up to the repo yourself! Let’s assume you’ve made a bug fix +directly on ‘master’. Here’s how to commit your changes: $ git diff Review the patch one more time $ git add ... Add any files for committing @@ -1181,7 +1187,7 @@ directly on 'master'. Here's how to commit your changes: $ git push Push the files up to the repo. Ta da! The first three steps are the same described earlier (*note Starting -A New Branch::). The 'git push' is what's new, and it updates the repo +A New Branch::). The ‘git push’ is what’s new, and it updates the repo on Savannah. Congratulations! As a courtesy, you should send a note to the mailing list indicating @@ -1201,7 +1207,7 @@ the repo. First, create a new branch to hold your feature: $ git checkout -b feature/python Create and switch to a new branch Now, you can develop as normal, adding new files if necessary (such -as new tests), modifying code, updating the 'ChangeLog' and +as new tests), modifying code, updating the ‘ChangeLog’ and documentation, and so on. You can share changes with the mailing list as diffs, as usual. @@ -1216,11 +1222,11 @@ their local systems and review your changes at their leisure. $ git commit Commit the files with a commit message $ git push -u origin feature/python Push the branch up to the repo - When you use 'push -u origin', Git helpfully converts your purely + When you use ‘push -u origin’, Git helpfully converts your purely local branch into a tracking branch. It becomes as if the branch had originated from the upstream repo and you checked it out locally. - _You only need to do 'git push -u origin' once._ As you continue to + _You only need to do ‘git push -u origin’ once._ As you continue to work on your branch, the workflow simplifies into this: $ git diff Review your changes @@ -1234,9 +1240,9 @@ File: gawkworkflow.info, Node: Developing fixes, Prev: Developing new features 5.5 Developing Fixes ==================== -If you want to make a fix on 'master' or on the current stable branch, +If you want to make a fix on ‘master’ or on the current stable branch, you work the same way, by producing and discussing a diff on the mailing -list. Once it's approved, you can commit it yourself: +list. Once it’s approved, you can commit it yourself: $ git checkout master Move to master $ git pull Make sure we're up to date with the maintainer @@ -1245,7 +1251,7 @@ list. Once it's approved, you can commit it yourself: $ git add ... Add any files for committing $ git commit Commit the files with a commit message. - When you're ready to push your changes: + When you’re ready to push your changes: $ git pull Download latest version; Git will merge $ gvim ... Resolve any merge conflicts with git add and git commit @@ -1260,41 +1266,41 @@ File: gawkworkflow.info, Node: General practices, Next: Repo Maintenance, Pre 6 General Development Practices ******************************* -This major node discusses general practices for 'gawk' development. The +This major node discusses general practices for ‘gawk’ development. The discussion here is mainly for developers with commit access to the Savannah repo. -"Propagating Fixes" - Usually, bug fixes should be made on the current "stable" branch. +“Propagating Fixes” + Usually, bug fixes should be made on the current “stable” branch. Once a fix has been reviewed and approved, you can commit it and push it yourself. Typically, the maintainer then takes care to - merge the fix to 'master' and from there to any other branches. + merge the fix to ‘master’ and from there to any other branches. However, you are welcome to save him the time and do this yourself. -"Directory ownership" - Some developers "own" certain parts of the tree, such as the 'pc' - and 'vms' directories. They are allowed to commit changes to those +“Directory ownership” + Some developers “own” certain parts of the tree, such as the ‘pc’ + and ‘vms’ directories. They are allowed to commit changes to those directories without review by the mailing list, but changes that also touch the mainline code should be submitted for review. -"New feature development" +“New feature development” Unless you can convince the maintainer (and the other developers!) otherwise, you should _always_ start branches for new features from - 'master', and not from the current "stable" branch. + ‘master’, and not from the current “stable” branch. - Use 'checkout -b feature/FEATURE_NAME' to create the initial + Use ‘git checkout -b feature/FEATURE_NAME’ to create the initial branch. You may then elect to keep it purely local, or to push it up to Savannah for review, even if the feature is not yet totally - "ready for prime time." + “ready for prime time.” During development of a new feature, you will most likely wish to keep your feature branch up to date with respect to ongoing improvements -in 'master'. This is generally easy to do. There are two different +in ‘master’. This is generally easy to do. There are two different mechanisms, and which one you use depends upon the nature of your new feature branch. -"As long as your branch is purely local" - You should use 'git rebase' to the keep the branch synchronized +“As long as your branch is purely local” + You should use ‘git rebase’ to the keep the branch synchronized with the original branch from which it was forked: $ git checkout master Move to master @@ -1304,14 +1310,14 @@ feature branch. The rebasing operation may require that you resolve conflicts (*note Merge Conflicts::). Edit any conflicted files and resolve - the problem(s). Compile and test your changes, then use 'git add' - and 'git commit' to indicate resolution, and then use 'git rebase - --continue' to continue the rebasing. Git is very good about + the problem(s). Compile and test your changes, then use ‘git add’ + and ‘git commit’ to indicate resolution, and then use ‘git rebase + --continue’ to continue the rebasing. Git is very good about providing short instructions on how to continue when such conflicts occur. -"Once the branch has been pushed up to Savannah" - You _must_ use 'git merge' to bring your feature branch up to date. +“Once the branch has been pushed up to Savannah” + You _must_ use ‘git merge’ to bring your feature branch up to date. That flow looks like this: $ git checkout master Move to master @@ -1320,11 +1326,11 @@ feature branch. $ git merge master Merge from master Here too, you may have to resolve any merge conflicts (*note Merge - Conflicts::). Once that's done, you can push the changes up to + Conflicts::). Once that’s done, you can push the changes up to Savannah. When the changes on your branch are complete, usually the maintainer -merges the branch to 'master'. But there's really no magic involved, +merges the branch to ‘master’. But there’s really no magic involved, the merge is simply done in the other direction: $ git checkout feature/python Checkout feature branch @@ -1333,13 +1339,13 @@ the merge is simply done in the other direction: $ git pull Bring it up to date $ git merge feature/python Merge from feature/python into master - If you've been keeping 'feature/python' in sync with 'master', then + If you’ve been keeping ‘feature/python’ in sync with ‘master’, then there should be no merge conflicts to resolve, and you can push the result to Savannah: $ git push Push up to Savannah - Since 'feature/python' is no longer needed, it can be gotten rid of: + Since ‘feature/python’ is no longer needed, it can be gotten rid of: $ git branch Still on master ... @@ -1347,15 +1353,15 @@ result to Savannah: $ git branch -d feature/python Delete feature branch $ git push -u origin --delete feature/python Delete on Savannah - The 'git push' command deletes the 'feature/python' branch from the + The ‘git push’ command deletes the ‘feature/python’ branch from the Savannah repo. -Finally, you should send an email to developer's list describing what -you've done so that everyone else can delete their copies of the branch -and do a 'git fetch --prune' (*note Repo Maintenance::). +Finally, you should send an email to developer’s list describing what +you’ve done so that everyone else can delete their copies of the branch +and do a ‘git fetch --prune’ (*note Repo Maintenance::). To update the other remaining development branches with the latest -changes on 'master', use the 'helpers/update-branches.sh' script in the +changes on ‘master’, use the ‘helpers/update-branches.sh’ script in the repo. @@ -1369,11 +1375,11 @@ repo clean. _Removing old branches_ Developers add branches to the Savannah repo and when development - on them is done, they get merged into 'master'. Then the branches + on them is done, they get merged into ‘master’. Then the branches on Savannah are deleted (as shown in *note General practices::). However, your local copies of those branches (labelled with the - 'origin/' prefix) remain in your local repo. If you don't need + ‘origin/’ prefix) remain in your local repo. If you don’t need them, then you can clean up your repo as follows. First, remove any related tracking branch you may have: @@ -1386,21 +1392,21 @@ _Removing old branches_ $ git fetch --prune Remove unneeded branches _Removing cruft_ - As Git works, occasional "cruft" collects in the repository. Git - does occasionally clean this out on its own, but if you're - concerned about disk usage, you can do so yourself using 'git gc' - (short for "garbage collect"). For example: + As Git works, occasional “cruft” collects in the repository. Git + does occasionally clean this out on its own, but if you’re + concerned about disk usage, you can do so yourself using ‘git gc’ + (short for “garbage collect”). For example: $ du -s . Check disk usage - -| 99188 . Almost 10 megabytes + ⊣ 99188 . Almost 10 megabytes $ git gc Collect garbage - -| Counting objects: 32114, done. - -| Delta compression using up to 4 threads. - -| Compressing objects: 100% (6370/6370), done. - -| Writing objects: 100% (32114/32114), done. - -| Total 32114 (delta 25655), reused 31525 (delta 25231) + ⊣ Counting objects: 32114, done. + ⊣ Delta compression using up to 4 threads. + ⊣ Compressing objects: 100% (6370/6370), done. + ⊣ Writing objects: 100% (32114/32114), done. + ⊣ Total 32114 (delta 25655), reused 31525 (delta 25231) $ du -s . Check disk usage again - -| 75168 . Down to 7 megabytes + ⊣ 75168 . Down to 7 megabytes _Renaming branches_ Occasionally you may want to rename a branch.(1) If your branch is @@ -1418,8 +1424,8 @@ _Renaming branches_ $ git push origin :feature/OLD-NAME feature/NEW-NAME - NOTE: It is the leading ':' in the first branch name that - causes Git to delete the old name in the upstream repo. Don't + NOTE: It is the leading ‘:’ in the first branch name that + causes Git to delete the old name in the upstream repo. Don’t omit it! Finally, reset the upstream branch for the local branch with the @@ -1428,7 +1434,7 @@ _Renaming branches_ $ git push -u origin feature/NEW-NAME You should also update the mailing list to let the other developers - know what's happening. + know what’s happening. ---------- Footnotes ---------- @@ -1442,7 +1448,7 @@ File: gawkworkflow.info, Node: Development Stuff, Next: Cheat Sheet, Prev: Re ******************* This major node discusses other things you need to know and/or do if -you're going to participate seriously in 'gawk' development. +you’re going to participate seriously in ‘gawk’ development. * Menu: @@ -1457,7 +1463,7 @@ File: gawkworkflow.info, Node: Coding style, Next: Doing paperwork, Up: Devel 8.1 Coding Style ================ -You should read the discussion about adding code in the 'gawk' +You should read the discussion about adding code in the ‘gawk’ documentation. *Note Additions: (gawk)Additions, for a discussion of the general procedure. In particular, pay attention to the coding style guidelines in *note Adding Code: (gawk)Adding Code.(1) These two @@ -1469,7 +1475,7 @@ respectively. ---------- Footnotes ---------- - (1) Changes that don't follow the coding style guidelines won't be + (1) Changes that don’t follow the coding style guidelines won’t be accepted. Period. @@ -1484,7 +1490,7 @@ Foundation. This is generally an easy thing to do. In particular, you can choose to use a version of the copyright assignment which assigns all your -current _and future_ changes to 'gawk' to the FSF. This means that you +current _and future_ changes to ‘gawk’ to the FSF. This means that you only need to do the paperwork once, and from then on all your changes will automatically belong to the FSF. The maintainer recommends doing this. @@ -1499,9 +1505,9 @@ File: gawkworkflow.info, Node: Tools, Next: Debugging, Prev: Doing paperwork, ================================ This minor node discusses additional tools that you may need to install -on your system in order to be in sync with what the 'gawk' maintainer +on your system in order to be in sync with what the ‘gawk’ maintainer uses. It also discusses different C compiler options for use during -code development, and how to compile 'gawk' for debugging. +code development, and how to compile ‘gawk’ for debugging. * Menu: @@ -1514,7 +1520,7 @@ File: gawkworkflow.info, Node: GNU Tools, Next: Compilers, Up: Tools 8.3.1 GNU Tools --------------- -If you expect to work with the configuration files and/or the 'Makefile' +If you expect to work with the configuration files and/or the ‘Makefile’ files, you will need to install a number of other GNU tools. In general, you should be using the latest versions of the tools, or least the same ones that the maintainer himself uses. This helps minimize the @@ -1523,38 +1529,38 @@ in general avoids confusion and hassle. Similarly, you should install the latest GNU documentation tools as well. The tools are described in the following list: -'autoconf' - GNU Autoconf processes the 'configure.ac' files in order to - generate the 'configure' shell script and 'config.h.in' input file. +‘autoconf’ + GNU Autoconf processes the ‘configure.ac’ files in order to + generate the ‘configure’ shell script and ‘config.h.in’ input file. See the Autoconf home page (https://www.gnu.org/software/autoconf/autoconf.html) for more information. -'automake' - GNU Automake processes the 'configure.ac' and 'Makefile.am' files - to produce 'Makefile.in' files. See the Automake home page +‘automake’ + GNU Automake processes the ‘configure.ac’ and ‘Makefile.am’ files + to produce ‘Makefile.in’ files. See the Automake home page (https://www.gnu.org/software/automake) for more information. -'gettext' - GNU Gettext processes the 'gawk' source code to produce the - original 'po/gawk.pot' message template file. Normally you should +‘gettext’ + GNU Gettext processes the ‘gawk’ source code to produce the + original ‘po/gawk.pot’ message template file. Normally you should not need need to do this; the maintainer usually manages this task. See the Gettext home page (https://www.gnu.org/software/gettext) for more information. -'libtool' +‘libtool’ GNU Libtool works with Autoconf and Automake to produce portable shared libraries. It is used for the extensions that ship with - 'gawk', whose code is in the 'extensions' directory. See the + ‘gawk’, whose code is in the ‘extensions’ directory. See the Libtool home page (https://www.gnu.org/software/libtool) for more information. -'makeinfo' - The 'makeinfo' command is used to build the Info versions of the +‘makeinfo’ + The ‘makeinfo’ command is used to build the Info versions of the documentation. You need to have the same version as the maintainer uses, so that when you make a change to the documentation, the corresponding change to the generated Info file will be minimal. - 'makeinfo' is part of GNU Texinfo. See the Texinfo home page + ‘makeinfo’ is part of GNU Texinfo. See the Texinfo home page (https://www.gnu.org/software/texinfo) for more information. @@ -1563,54 +1569,54 @@ File: gawkworkflow.info, Node: Compilers, Prev: GNU Tools, Up: Tools 8.3.2 Compilers --------------- -The default compiler for 'gawk' development is GCC, the GNU Compiler +The default compiler for ‘gawk’ development is GCC, the GNU Compiler Collection (https://gcc.gnu.org). The default version of GCC is -whatever is on the maintainer's personal GNU/Linux system, although he +whatever is on the maintainer’s personal GNU/Linux system, although he does try to build the latest released version if that is newer than -what's on his system, and then occasionally test 'gawk' with it. +what’s on his system, and then occasionally test ‘gawk’ with it. - He also attempts to test occasionally with 'clang' + He also attempts to test occasionally with ‘clang’ (https://clang.llvm.org/). However, he uses whatever is the default for his GNU/Linux system, and does _not_ make an effort to build the current version for testing. - Both GCC and 'clang' are highly optimizing compilers that produce + Both GCC and ‘clang’ are highly optimizing compilers that produce good code, but are very slow. There are two other compilers that are faster, but that may not produce quite as good code. However, they are both reasonable for doing development. -_The Tiny C Compiler, 'tcc'_ +_The Tiny C Compiler, ‘tcc’_ This compiler is _very_ fast, but it produces only mediocre code. - It is capable of compiling 'gawk', and it does so well enough that - 'make check' runs without errors. + It is capable of compiling ‘gawk’, and it does so well enough that + ‘make check’ runs without errors. However, in the past the quality has varied, and the maintainer has had problems with it. He recommends using it for regular development, where fast compiles are important, but rebuilding with - GCC before doing any commits, in case 'tcc' has missed + GCC before doing any commits, in case ‘tcc’ has missed something.(1) - See the project's home page (http://www.tinycc.org) for some - information. More information can be found in the project's Git + See the project’s home page (http://www.tinycc.org) for some + information. More information can be found in the project’s Git repository (http://repo.or.cz/tinycc.git). The maintainer builds - from the 'mob' branch for his work, but after updating it you - should check that this branch still works to compile 'gawk' before + from the ‘mob’ branch for his work, but after updating it you + should check that this branch still works to compile ‘gawk’ before installing it. _The (Revived) Portable C Compiler_ This is an updated version of the venerable Unix Portable C Compiler, PCC. It accepts ANSI C syntax and supports both older and - modern architectures. It produces better code than 'tcc' but is - slower, although still much faster than GCC and 'clang'. + modern architectures. It produces better code than ‘tcc’ but is + slower, although still much faster than GCC and ‘clang’. - See the project's home page (http://pcc.ludd.ltu.se) for more + See the project’s home page (http://pcc.ludd.ltu.se) for more information. See <http://pcc.ludd.ltu.se/supported-platforms> for instructions about obtaining the code using CVS and building it. - An alternative location for the source is the 'gawk' maintainer's + An alternative location for the source is the ‘gawk’ maintainer’s Git mirror (https://github.com/arnoldrobbins/pcc-revived) of the - code. If you're using Ubuntu GNU/Linux 18.04 or later, you need to - use the 'ubuntu-18' branch from this Git mirror. + code. If you’re using Ubuntu GNU/Linux 18.04 or later, you need to + use the ‘ubuntu-18’ branch from this Git mirror. ---------- Footnotes ---------- @@ -1623,14 +1629,14 @@ File: gawkworkflow.info, Node: Debugging, Prev: Tools, Up: Development Stuff =========================== If you wish to compile for debugging, you should use GCC. After running -'configure' but before running 'make', edit the 'Makefile' and remove -the '-O2' flag from the definition of 'CFLAGS'. Optionally, do the same -for 'support/Makefile' and/or 'extensions/Makefile'. Then run 'make'. +‘configure’ but before running ‘make’, edit the ‘Makefile’ and remove +the ‘-O2’ flag from the definition of ‘CFLAGS’. Optionally, do the same +for ‘support/Makefile’ and/or ‘extensions/Makefile’. Then run ‘make’. You can enable additional debugging code by creating a file named -'.developing' in the 'gawk' source code directory _before_ running -'configure'. Doing so enables additional conditionally-compiled -debugging code within 'gawk', and adds additional warning and debugging +‘.developing’ in the ‘gawk’ source code directory _before_ running +‘configure’. Doing so enables additional conditionally-compiled +debugging code within ‘gawk’, and adds additional warning and debugging options if compiling with GCC. It also disables optimization. @@ -1643,101 +1649,101 @@ This major node provides an alphabetical list of the Git commands cited in this Info file, along with brief descriptions of what the commands do. - Note that you may always use either 'git help COMMAND' or 'git -COMMAND --help' to get short, man-page style help on how to use any + Note that you may always use either ‘git help COMMAND’ or ‘git +COMMAND --help’ to get short, man-page style help on how to use any given Git command. -'git add' +‘git add’ Add a file to the list of files to be committed. -'git branch' +‘git branch’ View existing branches, or delete a branch. The most useful - options are '-a' and '-d'. + options are ‘-a’ and ‘-d’. -'git checkout' +‘git checkout’ Checkout an existing branch, create a new branch, or checkout a - file to reset it. Use the '-b' option to create and checkout a new + file to reset it. Use the ‘-b’ option to create and checkout a new branch in one operation. -'git clone' +‘git clone’ Clone (make a new copy of) an existing repository. You generally only need to do this once. -'git commit' +‘git commit’ Commit changes to files which have been staged for committing with - 'git add'. This makes your changes permanent, _in your local + ‘git add’. This makes your changes permanent, _in your local repository only_. To publish your changes to an upstream repo, you - must use 'git push'. + must use ‘git push’. -'git config' +‘git config’ Display and/or change global and/or local configuration settings. -'git diff' - Show a unified-format diff of what's changed in the current +‘git diff’ + Show a unified-format diff of what’s changed in the current directory as of the last commit. It helps to have Git configured to use its builtin pager for reviewing diffs (*note Configuring git::). -'git difftool' - Use a "tool" (usually a GUI-based program) to view differences, - instead of the standard textual diff as you'd get from 'git diff'. +‘git difftool’ + Use a “tool” (usually a GUI-based program) to view differences, + instead of the standard textual diff as you’d get from ‘git diff’. -'git fetch' - Update your local copy of the upstream's branches. That is, update - the various 'origin/' branches. This leaves your local tracking - branches unchanged. With the '--prune' option, this removes any - copies of stale 'origin/' branches. +‘git fetch’ + Update your local copy of the upstream’s branches. That is, update + the various ‘origin/’ branches. This leaves your local tracking + branches unchanged. With the ‘--prune’ option, this removes any + copies of stale ‘origin/’ branches. -'git format-patch' +‘git format-patch’ Create a series of patch files, one per commit not on the original branch from which you started. -'git gc' - Run a "garbage collection" pass in the current repository. This - can often reduce the space used in a large repo. For 'gawk' it +‘git gc’ + Run a “garbage collection” pass in the current repository. This + can often reduce the space used in a large repo. For ‘gawk’ it does not make that much difference. -'git help' - Print a man-page-style usage summary for a command. +‘git help’ + Print a man-page–style usage summary for a command. -'git log' - Show the current branch's commit log. This includes who made the +‘git log’ + Show the current branch’s commit log. This includes who made the commit, the date, and the commit message. Commits are shown from newest to oldest. -'git merge' +‘git merge’ Merge changes from the named branch into the current one. -'git pull' - When in your local tracking branch 'XXX', run 'git fetch', and then - merge from 'origin/XXX' into 'XXX'. +‘git pull’ + When in your local tracking branch ‘XXX’, run ‘git fetch’, and then + merge from ‘origin/XXX’ into ‘XXX’. -'git push' - Push commits from your local tracking branch 'XXX' through - 'origin/XXX' and on to branch 'XXX' in the upstream repo. Use 'git - push -u origin --delete XXX' to delete an upstream branch. (Do so +‘git push’ + Push commits from your local tracking branch ‘XXX’ through + ‘origin/XXX’ and on to branch ‘XXX’ in the upstream repo. Use ‘git + push -u origin --delete XXX’ to delete an upstream branch. (Do so carefully!) -'git rebase' +‘git rebase’ Rebase the changes in the current purely local branch to look as if they had been made relative to the latest commit in the current - upstream branch (typically 'master'). This is how you keep your + upstream branch (typically ‘master’). This is how you keep your local, in-progress changes up-to-date with respect to the original branch from which they were started. -'git reset' +‘git reset’ Restore the original state of the repo, especially with the - '--hard' option. Read up on this command, and use it carefully. + ‘--hard’ option. Read up on this command, and use it carefully. -'git stash' +‘git stash’ Save your current changes in a special place within Git. They can - be restored with 'git stash pop', even on a different branch. Use - 'git stash list' to see the list of stashed changes. + be restored with ‘git stash pop’, even on a different branch. Use + ‘git stash list’ to see the list of stashed changes. -'git status' +‘git status’ Show the status of files that are scheduled to be committed, and those that have been modified but not yet scheduled for committing. - Use 'git add' to schedule a file for committing. This command also + Use ‘git add’ to schedule a file for committing. This command also lists untracked files. @@ -1747,13 +1753,13 @@ Appendix B Git Resources ************************ There are many Git resources available on the Internet. Start at the -Git Project home page (http://git-scm.org). In particular, the 'Pro -Git' book (https://git-scm.com/book/en/v2) is available online. +Git Project home page (http://git-scm.org). In particular, the ‘Pro +Git’ book (https://git-scm.com/book/en/v2) is available online. See also the Savannah quick introduction to Git (http://savannah.gnu.org/maintenance/UsingGit). - A nice article on how Git works is 'Git From The Bottom Up' + A nice article on how Git works is ‘Git From The Bottom Up’ (http://jwiegley.github.io/git-from-the-bottom-up/), by John Wiegley. @@ -1762,7 +1768,7 @@ File: gawkworkflow.info, Node: TODO, Next: Index, Prev: Resources, Up: Top Appendix C Stuff Still To Do In This Document ********************************************* - * Fill out all examples with full output + • Fill out all examples with full output File: gawkworkflow.info, Node: Index, Prev: TODO, Up: Top @@ -1828,7 +1834,7 @@ Index * gawktexi.in documentation: Developing patches. (line 13) * GCC, the GNU Compiler Collection: Compilers. (line 6) * generating a single patch: Submitting Changes. (line 14) -* generating multiple patches: Submitting Changes. (line 24) +* generating multiple patches: Submitting Changes. (line 29) * gettext: GNU Tools. (line 27) * git branch command, -a option: Remotes. (line 6) * git command, git branch: Repo Copies. (line 39) @@ -1869,8 +1875,8 @@ Index * git command, git pull <2>: Rebasing. (line 10) * git command, git checkout <5>: Submitting Changes. (line 18) * git command, git diff <1>: Submitting Changes. (line 18) -* git command, git format-patch: Submitting Changes. (line 24) -* git command, git checkout <6>: Submitting Changes. (line 27) +* git command, git format-patch: Submitting Changes. (line 29) +* git command, git checkout <6>: Submitting Changes. (line 32) * git command, git checkout <7>: Removing Branches. (line 9) * git command, git pull <3>: Removing Branches. (line 9) * git command, git branch <1>: Removing Branches. (line 9) @@ -1884,10 +1890,10 @@ Index * git command, git merge: Points to remember. (line 37) * git command, git clone <2>: ssh clone. (line 10) * git command, git diff <3>: Developing patches. (line 16) -* git command, git diff <4>: Developing patches. (line 26) -* git command, git add <1>: Developing patches. (line 26) -* git command, git commit <1>: Developing patches. (line 26) -* git command, git push <1>: Developing patches. (line 26) +* git command, git diff <4>: Developing patches. (line 27) +* git command, git add <1>: Developing patches. (line 27) +* git command, git commit <1>: Developing patches. (line 27) +* git command, git push <1>: Developing patches. (line 27) * git command, git checkout <11>: Developing new features. (line 9) * git command, git pull <6>: Developing new features. @@ -1961,7 +1967,7 @@ Index * ownership of directories: General practices. (line 17) * pager.status configuration setting: Configuring git. (line 24) * patch, single, generation of: Submitting Changes. (line 14) -* patches, multiple, generation of: Submitting Changes. (line 24) +* patches, multiple, generation of: Submitting Changes. (line 29) * pcc compiler: Compilers. (line 40) * pcc compiler, Git mirror: Compilers. (line 50) * Portable C compiler: Compilers. (line 40) @@ -1995,69 +2001,69 @@ Index Tag Table: -Node: Top1127 -Node: Preface5223 -Node: Intended Audience5788 -Node: This Manual6662 -Node: Conventions8183 -Node: Acknowledgments9652 -Node: Reviewers10089 -Node: Contributing10410 -Node: Using Git13782 -Node: Push Pull14538 -Node: Repo Copies16083 -Ref: savannah-repo17066 -Ref: your-repo18100 -Ref: Repo Copies-Footnote-119795 -Node: Local Branches19852 -Ref: your-repo-221625 -Ref: Local Branches-Footnote-122707 -Node: Branches are state22765 -Node: Repo State23488 -Node: Local State25608 -Node: Remotes26272 -Node: Configuring git27586 -Ref: Configuring git-Footnote-129945 -Node: Development without commit access30114 -Node: Cloning31170 -Ref: Cloning-Footnote-132707 -Node: Switching Branches32774 -Node: Starting A New Branch33392 -Ref: Starting A New Branch-Footnote-135299 -Ref: Starting A New Branch-Footnote-235357 -Node: Undoing a change35433 -Node: Saving Without Committing36053 -Node: Updating37611 -Node: Rebasing38122 -Node: Merge Conflicts38734 -Node: Submitting Changes40944 -Ref: Submitting Changes-Footnote-143088 -Ref: Submitting Changes-Footnote-243125 -Ref: Submitting Changes-Footnote-343161 -Node: Removing Branches43207 -Node: Points to remember43743 -Node: Development with commit access45420 -Node: Initial setup46065 -Node: ssh clone46853 -Node: Developing patches47426 -Node: Developing new features48759 -Node: Developing fixes50659 -Node: General practices51742 -Node: Repo Maintenance56441 -Ref: Repo Maintenance-Footnote-159313 -Node: Development Stuff59449 -Node: Coding style60012 -Ref: Coding style-Footnote-160670 -Node: Doing paperwork60760 -Node: Tools61555 -Node: GNU Tools62137 -Node: Compilers64298 -Ref: Compilers-Footnote-166908 -Node: Debugging66946 -Node: Cheat Sheet67709 -Node: Resources71618 -Node: TODO72195 -Node: Index72415 +Node: Top1168 +Node: Preface5315 +Node: Intended Audience5886 +Node: This Manual6768 +Node: Conventions8329 +Node: Acknowledgments9850 +Node: Reviewers10287 +Node: Contributing10608 +Node: Using Git14062 +Node: Push Pull14822 +Node: Repo Copies16388 +Ref: savannah-repo17403 +Ref: your-repo18460 +Ref: Repo Copies-Footnote-120179 +Node: Local Branches20236 +Ref: your-repo-222057 +Ref: Local Branches-Footnote-123143 +Node: Branches are state23201 +Node: Repo State23936 +Node: Local State26120 +Node: Remotes26788 +Node: Configuring git28145 +Ref: Configuring git-Footnote-130555 +Node: Development without commit access30726 +Node: Cloning31786 +Ref: Cloning-Footnote-133351 +Node: Switching Branches33418 +Node: Starting A New Branch34052 +Ref: Starting A New Branch-Footnote-136036 +Ref: Starting A New Branch-Footnote-236096 +Node: Undoing a change36178 +Node: Saving Without Committing36802 +Node: Updating38386 +Node: Rebasing38901 +Node: Merge Conflicts39523 +Node: Submitting Changes41800 +Ref: Submitting Changes-Footnote-144271 +Ref: Submitting Changes-Footnote-244310 +Ref: Submitting Changes-Footnote-344346 +Node: Removing Branches44394 +Node: Points to remember44930 +Node: Development with commit access46639 +Node: Initial setup47292 +Node: ssh clone48096 +Node: Developing patches48686 +Node: Developing new features50110 +Node: Developing fixes52022 +Node: General practices53113 +Node: Repo Maintenance57946 +Ref: Repo Maintenance-Footnote-160857 +Node: Development Stuff60993 +Node: Coding style61562 +Ref: Coding style-Footnote-162224 +Node: Doing paperwork62318 +Node: Tools63117 +Node: GNU Tools63707 +Node: Compilers65940 +Ref: Compilers-Footnote-168620 +Node: Debugging68658 +Node: Cheat Sheet69469 +Node: Resources73576 +Node: TODO74161 +Node: Index74383 End Tag Table diff --git a/doc/gawkworkflow.texi b/doc/gawkworkflow.texi index 71f022ab..2afbb664 100644 --- a/doc/gawkworkflow.texi +++ b/doc/gawkworkflow.texi @@ -28,10 +28,10 @@ @c applies to and all the info about who's publishing this edition @c These apply across the board. -@set UPDATE-MONTH July, 2022 +@set UPDATE-MONTH March, 2023 @set TITLE Participating in @command{gawk} Development -@set EDITION 0.74 +@set EDITION 0.75 @iftex @set DOCUMENT booklet @@ -144,13 +144,13 @@ Fax: +1-617-542-2652 Email: <email>gnu@@gnu.org</email> URL: <ulink url="http://www.gnu.org">http://www.gnu.org/</ulink></literallayout> -<literallayout class="normal">Copyright © 2017, 2018, 2019, 2020, 2022 +<literallayout class="normal">Copyright © 2017, 2018, 2019, 2020, 2022, 2023 Free Software Foundation, Inc. All Rights Reserved.</literallayout> @end docbook @ifnotdocbook -Copyright @copyright{} 2017, 2018, 2019, 2020, 2022 +Copyright @copyright{} 2017, 2018, 2019, 2020, 2022, 2023 Free Software Foundation, Inc. @end ifnotdocbook @sp 2 @@ -695,7 +695,7 @@ branches: @table @dfn @item Tracking Branches @cindex tracking branches -@cindex branches @subentry tracking +@cindex branches @subentry tracking @cindex @command{git} command @subentry @code{git checkout} Tracking branches track branches from the upstream repository. You first create a tracking branch simply by checking out a branch from the @@ -1253,7 +1253,7 @@ $ @kbd{git rebase master} @ii{``Start over'' from current} mast @print{} error: Failed to merge in the changes. @print{} Patch failed at 0001 Demo change. @print{} Use 'git am --show-current-patch' to see the failed patch -@print{} +@print{} @print{} Resolve all conflicts manually, mark them as resolved with @print{} "git add/rm <conflicted_files>", then run "git rebase --continue". @print{} You can instead skip this commit: run "git rebase --skip". @@ -1301,6 +1301,12 @@ $ @kbd{git diff master > /tmp/python.diff} Mail the @file{python.diff} file to the appropriate mailing list along with a description of what you've changed and why. +The patch file will likely contain changes to generated files, +such as @file{awkgram.c} or @file{Makefile.in}. If you are +comfortable manually editing the patch file to remove those +changes, do so. If not, then send the file as-is and the +maintainer will handle it. + @cindex @command{git} command @subentry @code{git format-patch} @cindex generating multiple patches @cindex patches, multiple, generation of @@ -1493,6 +1499,7 @@ and/or @file{doc/gawk.1}. @cindex @command{git} command @subentry @code{git diff} @item Use @samp{git diff > mychange.diff} to create a patch file. +(If you want, remove the diffs for generated files.) @item Send it to the mailing list for discussion. @@ -1644,7 +1651,7 @@ Unless you can convince the maintainer (and the other developers!) otherwise, you should @emph{always} start branches for new features from @code{master}, and not from the current ``stable'' branch. -Use @samp{checkout -b feature/@var{feature_name}} to create the initial branch. +Use @samp{git checkout -b feature/@var{feature_name}} to create the initial branch. You may then elect to keep it purely local, or to push it up to Savannah for review, even if the feature is not yet totally ``ready for prime time.'' @end table @@ -1877,7 +1884,7 @@ documentation. @ifnothtml @xref{Additions, Additions, Making Additions to @command{gawk}, gawk, GAWK: Effective awk Programming}, for a discussion of the general procedure. In particular, pay attention to the -coding style guidelines in +coding style guidelines in @ref{Adding Code, Adding Code, Adding New Features, gawk, GAWK: Effective awk Programming}.@footnote{Changes that don't follow the coding style guidelines won't be accepted. Period.} These two sections may also be found online, at @@ -1889,7 +1896,7 @@ respectively. See @uref{https://www.gnu.org/software/gawk/manual/html_node/Additions.html#Additions, the section @cite{Making Additions to @command{gawk}}}, in the online documentation for a discussion of the general procedure. In particular, pay attention to the -coding style guidelines in +coding style guidelines in @uref{https://www.gnu.org/software/gawk/manual/html_node/Adding-Code.html#Adding-Code, the section @cite{Adding New Features}}, also in the online documentation. @end ifhtml @@ -2052,7 +2059,7 @@ for instructions about obtaining the code using CVS and building it. @cindex @command{pcc} compiler @subentry Git mirror An alternative location for the source is the @command{gawk} maintainer's @uref{https://github.com/arnoldrobbins/pcc-revived, -Git mirror} of the code. If you're using Ubuntu GNU/Linux 18.04 +Git mirror} of the code. If you're using Ubuntu GNU/Linux 18.04 or later, you need to use the @code{ubuntu-18} branch from this Git mirror. @end table diff --git a/doc/it/ChangeLog b/doc/it/ChangeLog index 1dce5629..e679b899 100755 --- a/doc/it/ChangeLog +++ b/doc/it/ChangeLog @@ -1,3 +1,28 @@ +2023-03-07 Antonio Giovanni Colombo <azc100@gmail.com> + + * gawktexi.in: Updated. + +2023-02-26 Antonio Giovanni Colombo <azc100@gmail.com> + + * gawktexi.in: Updated. + +2023-02-24 Antonio Giovanni Colombo <azc100@gmail.com> + + * gawktexi.in: Updated. + +2023-02-15 Arnold D. Robbins <arnold@skeeve.com> + + * gawktexi.in (EDITION): Bump to 5.3. Thanks to Antonio + Columbo for the suggestion. + +2023-02-13 Antonio Giovanni Colombo <azc100@gmail.com> + + * gawktexi.in: Updated. + +2023-02-10 Antonio Giovanni Colombo <azc100@gmail.com> + + * gawktexi.in: Updated. + 2023-02-07 Antonio Giovanni Colombo <azc100@gmail.com> * gawktexi.in: Updated. diff --git a/doc/it/api-figura1.eps b/doc/it/gawk-api-figura1.eps index 93560797..93560797 100644 --- a/doc/it/api-figura1.eps +++ b/doc/it/gawk-api-figura1.eps diff --git a/doc/it/api-figura1.fig b/doc/it/gawk-api-figura1.fig index c2718c71..c2718c71 100644 --- a/doc/it/api-figura1.fig +++ b/doc/it/gawk-api-figura1.fig diff --git a/doc/it/api-figura1.pdf b/doc/it/gawk-api-figura1.pdf Binary files differindex f31e25a8..f31e25a8 100644 --- a/doc/it/api-figura1.pdf +++ b/doc/it/gawk-api-figura1.pdf diff --git a/doc/it/api-figura1.png b/doc/it/gawk-api-figura1.png Binary files differindex 444c2976..444c2976 100644 --- a/doc/it/api-figura1.png +++ b/doc/it/gawk-api-figura1.png diff --git a/doc/it/api-figura1.txt b/doc/it/gawk-api-figura1.txt index 630e18f0..630e18f0 100644 --- a/doc/it/api-figura1.txt +++ b/doc/it/gawk-api-figura1.txt diff --git a/doc/it/api-figura2.eps b/doc/it/gawk-api-figura2.eps index 9920d3b9..9920d3b9 100644 --- a/doc/it/api-figura2.eps +++ b/doc/it/gawk-api-figura2.eps diff --git a/doc/it/api-figura2.fig b/doc/it/gawk-api-figura2.fig index a8b5c47d..a8b5c47d 100644 --- a/doc/it/api-figura2.fig +++ b/doc/it/gawk-api-figura2.fig diff --git a/doc/it/api-figura2.pdf b/doc/it/gawk-api-figura2.pdf Binary files differindex cadd4267..cadd4267 100644 --- a/doc/it/api-figura2.pdf +++ b/doc/it/gawk-api-figura2.pdf diff --git a/doc/it/api-figura2.png b/doc/it/gawk-api-figura2.png Binary files differindex dbc46910..dbc46910 100644 --- a/doc/it/api-figura2.png +++ b/doc/it/gawk-api-figura2.png diff --git a/doc/it/api-figura2.txt b/doc/it/gawk-api-figura2.txt index a030fb5a..a030fb5a 100644 --- a/doc/it/api-figura2.txt +++ b/doc/it/gawk-api-figura2.txt diff --git a/doc/it/api-figura3.eps b/doc/it/gawk-api-figura3.eps index daa3ba76..daa3ba76 100644 --- a/doc/it/api-figura3.eps +++ b/doc/it/gawk-api-figura3.eps diff --git a/doc/it/api-figura3.fig b/doc/it/gawk-api-figura3.fig index fae92940..fae92940 100644 --- a/doc/it/api-figura3.fig +++ b/doc/it/gawk-api-figura3.fig diff --git a/doc/it/api-figura3.pdf b/doc/it/gawk-api-figura3.pdf Binary files differindex 07f406bd..07f406bd 100644 --- a/doc/it/api-figura3.pdf +++ b/doc/it/gawk-api-figura3.pdf diff --git a/doc/it/api-figura3.png b/doc/it/gawk-api-figura3.png Binary files differindex 26ca6cd6..26ca6cd6 100644 --- a/doc/it/api-figura3.png +++ b/doc/it/gawk-api-figura3.png diff --git a/doc/it/api-figura3.txt b/doc/it/gawk-api-figura3.txt index 02791df5..02791df5 100644 --- a/doc/it/api-figura3.txt +++ b/doc/it/gawk-api-figura3.txt diff --git a/doc/it/flusso-elaborazione.eps b/doc/it/gawk-flusso-elaborazione.eps index c9e4c938..c9e4c938 100644 --- a/doc/it/flusso-elaborazione.eps +++ b/doc/it/gawk-flusso-elaborazione.eps diff --git a/doc/it/flusso-elaborazione.fig b/doc/it/gawk-flusso-elaborazione.fig index 50c9a209..50c9a209 100644 --- a/doc/it/flusso-elaborazione.fig +++ b/doc/it/gawk-flusso-elaborazione.fig diff --git a/doc/it/flusso-elaborazione.pdf b/doc/it/gawk-flusso-elaborazione.pdf Binary files differindex e7fb8555..e7fb8555 100644 --- a/doc/it/flusso-elaborazione.pdf +++ b/doc/it/gawk-flusso-elaborazione.pdf diff --git a/doc/it/flusso-elaborazione.png b/doc/it/gawk-flusso-elaborazione.png Binary files differindex 4dc95902..4dc95902 100644 --- a/doc/it/flusso-elaborazione.png +++ b/doc/it/gawk-flusso-elaborazione.png diff --git a/doc/it/flusso-elaborazione.txt b/doc/it/gawk-flusso-elaborazione.txt index 87a5b439..87a5b439 100644 --- a/doc/it/flusso-elaborazione.txt +++ b/doc/it/gawk-flusso-elaborazione.txt diff --git a/doc/it/programma-generico.eps b/doc/it/gawk-programma-generico.eps index db87944d..db87944d 100644 --- a/doc/it/programma-generico.eps +++ b/doc/it/gawk-programma-generico.eps diff --git a/doc/it/programma-generico.fig b/doc/it/gawk-programma-generico.fig index e87f6e3b..e87f6e3b 100644 --- a/doc/it/programma-generico.fig +++ b/doc/it/gawk-programma-generico.fig diff --git a/doc/it/programma-generico.pdf b/doc/it/gawk-programma-generico.pdf Binary files differindex d5c751af..d5c751af 100644 --- a/doc/it/programma-generico.pdf +++ b/doc/it/gawk-programma-generico.pdf diff --git a/doc/it/programma-generico.png b/doc/it/gawk-programma-generico.png Binary files differindex 1a877907..1a877907 100644 --- a/doc/it/programma-generico.png +++ b/doc/it/gawk-programma-generico.png diff --git a/doc/it/programma-generico.txt b/doc/it/gawk-programma-generico.txt index 1f6e5124..1f6e5124 100644 --- a/doc/it/programma-generico.txt +++ b/doc/it/gawk-programma-generico.txt diff --git a/doc/it/vettore-elementi.eps b/doc/it/gawk-vettore-elementi.eps index 87979fb8..87979fb8 100644 --- a/doc/it/vettore-elementi.eps +++ b/doc/it/gawk-vettore-elementi.eps diff --git a/doc/it/vettore-elementi.fig b/doc/it/gawk-vettore-elementi.fig index 37f3449c..37f3449c 100644 --- a/doc/it/vettore-elementi.fig +++ b/doc/it/gawk-vettore-elementi.fig diff --git a/doc/it/vettore-elementi.pdf b/doc/it/gawk-vettore-elementi.pdf Binary files differindex cfec8760..cfec8760 100644 --- a/doc/it/vettore-elementi.pdf +++ b/doc/it/gawk-vettore-elementi.pdf diff --git a/doc/it/vettore-elementi.png b/doc/it/gawk-vettore-elementi.png Binary files differindex 87ef434d..87ef434d 100644 --- a/doc/it/vettore-elementi.png +++ b/doc/it/gawk-vettore-elementi.png diff --git a/doc/it/vettore-elementi.txt b/doc/it/gawk-vettore-elementi.txt index 5d7efb83..5d7efb83 100644 --- a/doc/it/vettore-elementi.txt +++ b/doc/it/gawk-vettore-elementi.txt diff --git a/doc/it/gawkbug.1 b/doc/it/gawkbug.1 new file mode 100755 index 00000000..484777a8 --- /dev/null +++ b/doc/it/gawkbug.1 @@ -0,0 +1,84 @@ +.\" +.\" MAN PAGE COMMENTS to +.\" +.\" Arnold Robbins +.\" bug-gawk@gnu.org +.\" +.\" Last Change: Mon Apr 18 16:21:25 IDT 2022 +.\" +.\" Traduzione di Antonio Giovanni Colombo <azc100@gmail.com> +.\" per la versione gawk-5.2 +.TH GAWKBUG 1 "2022 Apr 18" "GNU Awk 5.2" +.SH NOME +gawkbug \- segnala un bug di gawk +.SH SINTASSI +\fBgawkbug\fP [\fI--version\fP] [\fI--help\fP] [\fIindirizzo-email\fP] +.SH DESCRIZIONE +.B gawkbug +è una script di shell che aiuta un utente a comporre e spedire delle +segnalazioni di bug riguardo a +.B gawk +in un formato standard. +.B gawkbug +chiama il programma di edit specificato nella variabile +.SM +.B EDITOR +per modificare una copia temporanea di un modulo standard per +la segnalazione di errori. L'utente deve riempire i campi appropriati +e quindi uscire dalla sessione di edit. +In seguito, +.B gawkbug +spedisce la segnalazione così preparata a \fIbug-gawk@gnu.org\fP, o +all'\fIindirizzo-email\fP specificato. Se l'invio non riesce, il +modulo compilato viene salvato, con il nome \fIdead.gawkbug\fP, +nella home directory dell'utente che sta facendo la segnalazione. +.PP +Il modulo di segnalazione bug è composto da più sezioni. +La prima sezione fornisce informazioni riguarda al computer, al +sistema operativo, alla versione di +.B gawk +e all'ambiente di compilazione. +La seconda sezione va riempita con la descrizione del bug. +La terza sezione dovrebbe contenere una descrizione di come è +possibile riprodurre il bug. +La quarta sezione (opzionale) permette di segnalare una possibile +correzione. La segnalazione di correzioni è molto gradita. +.SH VARIABILI D'AMBIENTE +.B gawkbug +utilizzerà le seguenti variabili d'ambiente, se definite: +.TP +.B EDITOR +Specifica il programma di edit preferito. Se +.SM +.B EDITOR +non è impostato, +.B gawkbug +tenta di trovare alcuni programma di edit alternativi, compreso +.BR vim +e, se necessario, +.BR emacs . +Se +.B gawkbug +non riesce a trovare alcun programma di edit alternativo, tenta di eseguire \fBvi\fP. +.TP +.B HOME +Nome della directory in cui viene salvata una segnalazione di bug, se non +è stato possibile inviarla con la posta elettronica. +.TP +.B TMPDIR +Nome della directory in cui creare file e directory temporanei. +.SH VEDERE ANCHE +.TP +\fIgawk\fP(1) +.SH AUTORI +Brian Fox, Free Software Foundation +.br +bfox@gnu.org +.PP +Chet Ramey, Case Western Reserve University +.br +chet@po.cwru.edu +.PP +Arnold Robbins +.br +bug-gawk@gnu.org diff --git a/doc/it/gawktexi.in b/doc/it/gawktexi.in index 2b172f7d..68216999 100755 --- a/doc/it/gawktexi.in +++ b/doc/it/gawktexi.in @@ -56,9 +56,9 @@ @c applies to and all the info about who's publishing this edition @c These apply across the board. -@set UPDATE-MONTH Dicembre 2022 -@set VERSION 5.2 -@set PATCHLEVEL 2 +@set UPDATE-MONTH Febbraio 2023 +@set VERSION 5.3 +@set PATCHLEVEL 0 @c added Italian hyphenation stuff @hyphenation{ven-go-no o-met-te-re o-met-ten-do} @@ -73,7 +73,7 @@ @set TITLE GAWK: Programmare efficacemente in AWK @end ifclear @set SUBTITLE Una Guida Utente per GNU Awk -@set EDITION 5.2 +@set EDITION 5.3 @iftex @set DOCUMENT libro @@ -772,6 +772,7 @@ Copyright dell'edizione italiana @copyright{} 2016 -- Free Software Foundation, @code{close()}. * Continuazione dopo errori:: Abilitare continuazione dopo errori in output. +* Noflush:: Velocizzare output da @dfn{pipe}. * Sommario di Output:: Sommario di Output. * Esercizi su Output:: Esercizi. * Valori:: Costanti, variabili ed espressioni @@ -5625,11 +5626,16 @@ directory possono essere necessarie per organizzare i file da includere. Vista la possibilit@`a di specificare opzioni @option{-f} multiple, il meccanismo @code{@@include} non @`e strettamente necessario. Comunque, la direttiva @code{@@include} pu@`o essere d'aiuto nel costruire -programmi @command{gawk} autosufficienti, riducendo cos@`{@dotless{i}} la necessit@`a -di scrivere righe di comando complesse e tediose. +programmi @command{gawk} autosufficienti, riducendo cos@`{@dotless{i}} la +necessit@`a di scrivere righe di comando complesse e tediose. In particolare, @code{@@include} @`e molto utile per scrivere @dfn{script} CGI eseguibili da pagine web. +La direttiva @code{@@include} e l'opzione @option{-i}/@option{--include} +sulla riga di comando sono completamente equivalenti. Un programma sorgente +incluso non viene caricato di nuovo se @`e stato gi@`a caricato +in precedenza. + Le regole usate per trovare un file sorgente, descritte @iftex nella @@ -10521,6 +10527,7 @@ e parla della funzione predefinita @code{close()}. file gi@`a aperti a inizio esecuzione * Chiusura file e @dfn{pipe}:: Chiudere file in input e di output e @dfn{pipe}. +* Noflush:: Velocizzare output da @dfn{pipe}. * Continuazione dopo errori:: Abilitare continuazione dopo errori in output. * Sommario di Output:: Sommario di Output. @@ -12138,8 +12145,63 @@ portabile. In modalit@`a POSIX (@pxref{Opzioni}), @command{gawk} restituisce solo zero quando chiude una @dfn{pipe}. +@node Noflush +@section Velocizzare output da @dfn{pipe} +@c FIXME: Add indexing + +Questa +@end ifnotinfo +@ifinfo +Questo +@end ifinfo +@value{SECTION} descrive una funzionalit@`a propria di @command{gawk}. + +Normalmente, quando si spediscono data tramite una @dfn{pipe} a +un comando, usando le istruzioni @code{print} o @code{printf}, +@command{gawk} scarica l'output verso la @dfn{pipe}. +Ovvero, l'output non @`e bufferizzato, ma scritto direttamente. +Ci@`o garantisce che l'output della @dfn{pipe}, insieme a quello +generato da @command{gawk} viene scritto enll'ordine che ci si +aspetta: + +@example +print "qualcosa" # va allo standard output +print "qualcos'altro" | "un-comando" # anche allo standard output +print "ulteriori cose" # come pure questo +@end example + +Fare ci@`o ha un prezzo; scaricare dati nella @dfn{pipe} usa +pi@`u tempo CPU, e in alcuni ambienti tale consumo pu@`o +essere eccessivo. + +Si pu@`o chiedere a @command{gawk} di non scaricare direttamente dati +ma di bufferizzarli, in uno dei seguenti due modi: + +@itemize @bullet +@item +Impostare @code{PROCINFO["BUFFERPIPE"]} a un valore qualsiasi. +Dopo aver fatto questo, @command{gawk} bufferizzer@`a i dati per tutte +le @dfn{pipe}. + +@item +Impostare @code{PROCINFO["@var{un-comando}", "BUFFERPIPE"]} a un +valore qualsiasi. In tal caso, solo i dati relativi al comando +@var{un-comando} saranno bufferizzati. +@end itemize + +Uno degli elementi visti sopra @emph{deve} essere impostato nel +vettore @code{PROCINFO} @emph{prima} di eseguire la prima istruzione +@code{print} o @code{printf} diretta alla @dfn{pipe}. +Se lo si fa dopo che dell'output @`e gi@`a stato inviato alla @dfn{pipe}, +@`e troppo tardi. + +Utilizzare questa funzionalit@`a pu@`o modificare il comportamento +dell'output [cambiando l'ordine di quel che viene stampato], +quindi occorre stare attenti a quello che si fa. + @node Continuazione dopo errori @section Abilitare continuazione dopo errori in output +@c FIXME: Add indexing @ifnotinfo Questa @@ -17675,6 +17737,14 @@ I seguenti elementi consentono di modificare il comportamento di @command{gawk}: @table @code +@item PROCINFO["BUFFERPIPE"] +Se questo elemento esiste, tutto l'output alla @dfn{pipe} viene +bufferizzato. + +@item PROCINFO["@var{un-comando}", "BUFFERPIPE"] +Rende bufferizzato l'output del comnado @var{un-comando}. +@xref{Noflush}. + @item PROCINFO["NONFATAL"] Se questo elemento esiste, gli errori di I/O per tutte le ridirezioni consentono la prosecuzione del programma. @@ -18211,10 +18281,10 @@ concettualmente, se i valori degli elementi sono 8, @code{"pippo"}, @float Figura,vettore-elementi @caption{Un vettore contiguo} @ifset SMALLPRINT -@center @image{vettore-elementi, 11cm, , Un vettore contiguo} +@center @image{gawk-vettore-elementi, 11cm, , Un vettore contiguo} @end ifset @ifclear SMALLPRINT -@center @image{vettore-elementi, , , Un vettore contiguo} +@center @image{gawk-vettore-elementi, , , Un vettore contiguo} @end ifclear @end float @end ifnotdocbook @@ -18223,7 +18293,7 @@ concettualmente, se i valori degli elementi sono 8, @code{"pippo"}, <figure id="vettore-elementi" float="0"> <title>Un vettore contiguo</title> <mediaobject> -<imageobject role="web"><imagedata fileref="vettore-elementi.png" format="PNG"/></imageobject> +<imageobject role="web"><imagedata fileref="gawk-vettore-elementi.png" format="PNG"/></imageobject> </mediaobject> </figure> @end docbook @@ -38664,10 +38734,10 @@ Questo si pu@`o vedere in @inlineraw{docbook, <xref linkend="figura-carica-esten @float Figura,figura-carica-estensione @caption{Caricamento dell'estensione} @ifclear SMALLPRINT -@center @image{api-figura1, , , Caricamento dell'estensione} +@center @image{gawk-api-figura1, , , Caricamento dell'estensione} @end ifclear @ifset SMALLPRINT -@center @image{api-figura1, 11cm, , Caricamento dell'estensione} +@center @image{gawk-api-figura1, 11cm, , Caricamento dell'estensione} @end ifset @end float @@ -38677,7 +38747,7 @@ Questo si pu@`o vedere in @inlineraw{docbook, <xref linkend="figura-carica-esten <figure id="figura-carica-estensione" float="0"> <title>Caricamento dell'estensione</title> <mediaobject> -<imageobject role="web"><imagedata fileref="api-figura1.png" format="PNG"/></imageobject> +<imageobject role="web"><imagedata fileref="gawk-api-figura1.png" format="PNG"/></imageobject> </mediaobject> </figure> @end docbook @@ -38705,10 +38775,10 @@ Questo @`e shown in @inlineraw{docbook, <xref linkend="figura-registrare-una-nuo @float Figura,figura-registrare-una-nuova-funzione @caption{Registrare una nuova funzione} @ifclear SMALLPRINT -@center @image{api-figura2, , , Registrare una nuova funzione} +@center @image{gawk-api-figura2, , , Registrare una nuova funzione} @end ifclear @ifset SMALLPRINT -@center @image{api-figura2, 11cm , , Registrare una nuova funzione} +@center @image{gawk-api-figura2, 11cm , , Registrare una nuova funzione} @end ifset @end float @end ifnotdocbook @@ -38717,7 +38787,7 @@ Questo @`e shown in @inlineraw{docbook, <xref linkend="figura-registrare-una-nuo <figure id="figura-registrare-una-nuova-funzione" float="0"> <title>Registering a new function</title> <mediaobject> -<imageobject role="web"><imagedata fileref="api-figura2.png" format="PNG"/></imageobject> +<imageobject role="web"><imagedata fileref="gawk-api-figura2.png" format="PNG"/></imageobject> </mediaobject> </figure> @end docbook @@ -38745,10 +38815,10 @@ Questo @`e mostrato in @inlineraw{docbook, <xref linkend="figura-chiamata-nuova- @float Figura,figura-chiamata-nuova-funzione @caption{Chiamata della nuova funzione} @ifclear SMALLPRINT -@center @image{api-figura3, , , Chiamata della nuova funzione} +@center @image{gawk-api-figura3, , , Chiamata della nuova funzione} @end ifclear @ifset SMALLPRINT -@center @image{api-figura3,11cm , , Chiamata della nuova funzione} +@center @image{gawk-api-figura3,11cm , , Chiamata della nuova funzione} @end ifset @end float @end ifnotdocbook @@ -38757,7 +38827,7 @@ Questo @`e mostrato in @inlineraw{docbook, <xref linkend="figura-chiamata-nuova- <figure id="figura-chiamata-nuova-funzione" float="0"> <title>Chiamata della nuova funzione</title> <mediaobject> -<imageobject role="web"><imagedata fileref="api-figura3.png" format="PNG"/></imageobject> +<imageobject role="web"><imagedata fileref="gawk-api-figura3.png" format="PNG"/></imageobject> </mediaobject> </figure> @end docbook @@ -39847,9 +39917,9 @@ registrate, quando viene invocato specificando l'opzione @option{--version}. @cindex input @subentry analizzatore di @subentry personalizzato Per default, @command{gawk} legge file di testo come input. Il valore della -variabile @code{RS} @`e usato per determinare la fine di un record, e subito -dopo la variabile @code{FS} (o @code{FIELDWIDTHS} o @code{FPAT}) viene usata -per suddividerlo in campi +variabile @code{RS} @`e usato per determinare la fine di un record in input, +e subito dopo la variabile @code{FS} (o @code{FIELDWIDTHS} o @code{FPAT}) +viene usata per suddividerlo in campi @iftex (@pxrefil{Leggere file}). @end iftex @@ -39965,14 +40035,41 @@ inizialmente) da @code{@var{XXX}_can_take_file()}, e quelli che sono usati da Il nome del file. @item int fd; -Un descrittore di file per il file. Se @command{gawk} riesce ad aprire -il file, il valore di @code{fd} @emph{non} sar@`a uguale a -@code{INVALID_HANDLE} [descrittore non valido]. In caso contrario, -quello sar@`a il valore. +Un descrittore di file per il file. @command{gawk} tenta di aprire +il file in lettura usando la chiamata di sistema @code{open()}. +Se il file viene effettivamente aperto, il valore di @code{fd} +@emph{non} sar@`a uguale a @code{INVALID_HANDLE} +[descrittore non valido]. +In caso contrario, il valore sar@`a quello. + +Un'estensione pu@`o decidere che non desidera usare il descrittore +di file aperto che le viene passato da @command{gawk}. +In tal caso pu@`o chiudere il file e impostare il relativo +descrittore di file @code{fd} al valore @code{INVALID_HANDLE}, +oppure ignorarlo e mantenere un suo proprio descrittore di file +nei suoi dati privati, raggiungibili usando il puntatore +@code{opaque} (vedere pi@`u sotto in questa lista). +In ogni caso, se il descrittore di file @`e valido, @emph{non} +dovrebbe essere sovrascritto con un altro valore; se lo si fa, +si causa una perdita di dati. @item struct stat sbuf; Se il descrittore di file @`e valido, @command{gawk} avr@`a riempito i campi di questa struttura invocando la chiamata di sistema @code{fstat()}. +Altrimenti, se la chiamata di sistema @code{lstat()} @`e disponibile, +essa verr@`a usata. Se neppure @code{lstat()} @`e disponibile, allora +verr@`a usata la chiamata di sistema @code{stat()}. + +L'ottenere informazioni relative al file consente alle estensioni di +controllare il tipo di file, anche se non sar@`a poi possibile aprirlo. +Ci@`o capita, per esempio, nei sistemi Windows, quando si tenta di +usare @code{open()} su una directory. + +Se @command{gawk} non sar@`a riuscito a ottenere le informazioni +relative al file, allora @code{sbuf} conterr@`a zeri binari. +In particolare, il codice dell'estensione pu@`o testare se il campo +@samp{sbuf.st_mode == 0}. Se @`e questo il caso, allora in +@code{sbuf} non @`e contenuta alcuna informazione. @end table La funzione @code{@var{XXX}_can_take_file()} dovrebbe esaminare i campi di @@ -40010,7 +40107,7 @@ Questo puntatore a funzione dovrebbe puntare a una funzione che crea i record in input. Tale funzione @`e il nucleo centrale dell'analizzatore di input. Il suo modo di operare @`e descritto nel testo che segue questo elenco. -@item ssize_t (*read_func)(); +@item ssize_t (*read_func)(int, void *, size_t); Questo puntatore a funzione dovrebbe puntare a una funzione che ha lo stesso comportamento della chiamata di sistema standard POSIX @code{read()}. @`E in alternativa al puntatore a @code{get_record}. Il relativo comportamento @@ -40037,14 +40134,16 @@ record in input. I parametri sono i seguenti: @table @code @item char **out -Questo @`e un puntatore a una variabile @code{char *} che @`e impostatata in modo -da puntare al record. @command{gawk} usa una sua copia locale dei dati, -quindi l'estensione deve gestire la relativa area di memoria. +Questo @`e un puntatore a una variabile @code{char *} che @`e impostatata +in modo da puntare al record. @command{gawk} usa una sua copia locale dei +dati, quindi l'estensione dell'utente deve gestire la relativa area di +memoria. @item struct awk_input *iobuf -Questa @`e la struttura @code{awk_input_buf_t} per il file. I campi dovrebbero -essere usati per leggere i dati (@code{fd}) e per gestire lo stato privato -(@code{opaque}), se necessario. +Questa @`e la struttura @code{awk_input_buf_t} per il file. +Due dei suoi campi dovrebbero essere usati dall'estensione utente: +@code{fd} per leggere i dati, e @code{opaque} per gestire lo stato privato +se necessario. @item int *errcode Se si verifica un errore, @code{*errcode} dovrebbe essere impostato a un @@ -40056,8 +40155,8 @@ Se il concetto ``fine record'' @`e applicabile, @code{*rt_start} dovrebbe essere impostato per puntare ai dati da usare come @code{RT}, e @code{*rt_len} dovrebbe essere impostata alla lunghezza di quel campo. In caso contrario, @code{*rt_len} dovrebbe essere impostata a zero. -@command{gawk} usa una sua copia di questi dati, quindi l'estensione deve -gestire tale memoria. +Anche qui @command{gawk} usa una sua copia di questi dati, quindi +l'estensione utente deve gestire tale memoria. @item const awk_fieldwidth_info_t **field_width Se @code{field_width} non @`e @code{NULL}, @code{*field_width} sar@`a @@ -40069,6 +40168,7 @@ inoltre essa deve rimanere disponibile almeno fino alla successiva chiamata a @code{get_record} o a @code{close_func}. Si noti inoltre che @code{field_width} vale @code{NULL} quando @code{getline} sta assegnando i risultati a una variabile, e quindi un'analisi del campo non @`e necessaria. + Se l'analizzatore imposta @code{*field_width}, allora @command{gawk} usa questa descrizione per analizzare il record in input, e il valore di @code{PROCINFO["FS"]} sar@`a @code{"API"} finch@'e questo @@ -40127,15 +40227,8 @@ valore preso da una variabile @command{awk}, come fa l'estensione XML inclusa nel progetto @code{gawkextlib} (@pxref{gawkextlib}). In quest'ultimo caso, il codice in una regola @code{BEGINFILE} pu@`o controllare @code{FILENAME} ed @code{ERRNO} per decidere se -attivare un analizzatore di input (@pxref{BEGINFILE/ENDFILE}) oppure no. - -Un analizzatore di input va registrato usando la seguente funzione: - -@table @code -@item void register_input_parser(awk_input_parser_t *input_parser); -Registra l'analizzatore di input puntato da @code{input_parser} con -@command{gawk}. -@end table +attivare un analizzatore di input utente oppure no +(@pxref{BEGINFILE/ENDFILE}). Se si vuole ignorare il meccanismo di default per l'analisi dei campi per un determinato record, si deve riempire una struttura @@ -40182,6 +40275,14 @@ dimensione pu@`o essere usata come argomento per @code{malloc()} o in una struttura @dfn{union} per allocare spazio staticamente. Per un esempio si pu@`o vedere l'estensione di esempio @code{readdir_test}. +Un analizzatore di input va registrato usando la seguente funzione: + +@table @code +@item void register_input_parser(awk_input_parser_t *input_parser); +Registra l'analizzatore di input puntato da @code{input_parser} con +@command{gawk}. +@end table + @node Processori di output @subsubsection Registrare un processore di output @cindex personalizzato @subentry processore di output @@ -40297,11 +40398,14 @@ appropriate per fare il lavoro richiesto. La funzione @code{@var{XXX}_can_take_file()} dovrebbe decidere in base ai campi @code{nome} e @code{modo}, e a ogni altro ulteriore indicatore di stato (p.es., valori di variabili @command{awk}) adatto allo scopo. +@command{gawk} tenta di aprire in scrittura il file indicato. +L'elemento @code{fp} varr@`a @code{NULL} solo se non si riesci ad aprire +il file. Quando @command{gawk} chiama @code{@var{XXX}_take_control_of()}, la funzione dovrebbe riempire i rimanenti campi in modo opportuno, tranne che per @code{fp}, che dovrebbe essere usato -normalmente. +normalmente se il suo valore non @`e @code{NULL}. Il processore di output va registrato usando la seguente funzione: @@ -45425,6 +45529,10 @@ La funzione @code{getline} ridiretta @`e stata resa possibile all'interno di (@pxref{BEGINFILE/ENDFILE}). @item +Supporto per continuare dopo errori di I/O non fatali +@xref{Continuazione dopo errori}. + +@item Il comando @code{where} @`e stato aggiunto al debugger (@pxref{Stack di esecuzione}). @@ -45653,24 +45761,22 @@ persistente (PMA) @`e disponibile. @end itemize -La Versione 5.3 ha aggiunto le seguenti funzionalit@`a: +La versione 5.3 ha aggiunto le seguenti funzionalit@`a: @itemize @item -Divisione di campi per i file CSV -[Campi Separati da Virgola] +Divisione in campi per file di tipo CSV (Campi separati da virgola) (@pxref{Campi separati da virgola}). @item -La necessit@`a di utilizzare la libreria GNU @code{libsigsegv} -@`e stata rimossa da @command{gawk}. -Il valore aggiunto non @`e mai stato grande, e il suo utilizzo -creava problemi in alcuni sistemi operativi. +La possibilit@`a che @command{gawk} bufferizzi l'output a @dfn{pipe} +(@pxref{Noflush}). @item -L'elemento @code{"pma"} nel vettore -@code{PROCINFO} -(@pxref{Variabili predefinite}). +La necessit@`a di utilizzare la libreria GNU @code{libsigsegv} +@`e stata rimossa da @command{gawk}. +Il valore aggiunto relativo non @`e mai stato grande e la funzionalit@`a +causava problemi in alcuni sistemi. @end itemize @@ -49652,10 +49758,10 @@ Si veda la @inlineraw{docbook, <xref linkend="figura-generica-flusso"/>}. @float Figura,figura-generica-flusso @caption{Flusso generico di un programma} @ifclear SMALLPRINT -@center @image{programma-generico, , , Flusso generico di un programma} +@center @image{gawk-programma-generico, , , Flusso generico di un programma} @end ifclear @ifset SMALLPRINT -@center @image{programma-generico, 11cm, , Flusso generico di un programma} +@center @image{gawk-programma-generico, 11cm, , Flusso generico di un programma} @end ifset @end float @end ifnotdocbook @@ -49664,7 +49770,7 @@ Si veda la @inlineraw{docbook, <xref linkend="figura-generica-flusso"/>}. <figure id="figura-generica-flusso" float="0"> <title>Flusso generico di un programma</title> <mediaobject> -<imageobject role="web"><imagedata fileref="programma-generico.png" format="PNG"/></imageobject> +<imageobject role="web"><imagedata fileref="gawk-programma-generico.png" format="PNG"/></imageobject> </mediaobject> </figure> @end docbook @@ -49702,10 +49808,10 @@ come si vede nella @inlineraw{docbook, <xref linkend="figura-flusso-elaborazione @float Figura,figura-flusso-elaborazione @caption{Fasi di un programma generico} @ifclear SMALLPRINT -@center @image{flusso-elaborazione, , , Fasi di un programma generico} +@center @image{gawk-flusso-elaborazione, , , Fasi di un programma generico} @end ifclear @ifset SMALLPRINT -@center @image{flusso-elaborazione, 11cm , , Fasi di un programma generico} +@center @image{gawk-flusso-elaborazione, 11cm , , Fasi di un programma generico} @end ifset @end float @end ifnotdocbook @@ -49714,7 +49820,7 @@ come si vede nella @inlineraw{docbook, <xref linkend="figura-flusso-elaborazione <figura id="figura-flusso-elaborazione" float="0"> <title>Fasi di un programma generico</title> <mediaobject> -<imageobject role="web"><imagedata fileref="flusso-elaborazione.png" format="PNG"/></imageobject> +<imageobject role="web"><imagedata fileref="gawk-flusso-elaborazione.png" format="PNG"/></imageobject> </mediaobject> </figure> @end docbook diff --git a/doc/pm-gawk.info b/doc/pm-gawk.info index 5503b317..fa186620 100644 --- a/doc/pm-gawk.info +++ b/doc/pm-gawk.info @@ -1,7 +1,7 @@ -This is pm-gawk.info, produced by makeinfo version 6.8 from +This is pm-gawk.info, produced by makeinfo version 7.0.1 from pm-gawk.texi. -Copyright (C) 2022 Terence Kelly +Copyright © 2022 Terence Kelly <tpkelly@eecs.umich.edu> <tpkelly@cs.princeton.edu> <tpkelly@acm.org> @@ -11,7 +11,7 @@ Copyright (C) 2022 Terence Kelly Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with the -Invariant Sections being "Introduction" and "History", no Front-Cover +Invariant Sections being “Introduction” and “History”, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license is available at <https://www.gnu.org/licenses/fdl-1.3.html> INFO-DIR-SECTION Text creation and manipulation @@ -25,14 +25,14 @@ File: pm-gawk.info, Node: Top, Next: Introduction, Up: (dir) General Introduction ******************** -'gawk' 5.2 introduces a _persistent memory_ feature that can "remember" +‘gawk’ 5.2 introduces a _persistent memory_ feature that can “remember” script-defined variables and functions across executions; pass variables between unrelated scripts without serializing/parsing text files; and handle data sets larger than available memory plus swap. This supplementary manual provides an in-depth look at persistent-memory -'gawk'. +‘gawk’. -Copyright (C) 2022 Terence Kelly +Copyright © 2022 Terence Kelly <tpkelly@eecs.umich.edu> <tpkelly@cs.princeton.edu> <tpkelly@acm.org> @@ -42,7 +42,7 @@ Copyright (C) 2022 Terence Kelly Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with the -Invariant Sections being "Introduction" and "History", no Front-Cover +Invariant Sections being “Introduction” and “History”, no Front-Cover Texts, and no Back-Cover Texts. A copy of the license is available at <https://www.gnu.org/licenses/fdl-1.3.html> @@ -65,46 +65,46 @@ File: pm-gawk.info, Node: Introduction, Next: Quick Start, Prev: Top, Up: To ************** -GNU AWK ('gawk') 5.2, expected in September 2022, introduces a new +GNU AWK (‘gawk’) 5.2, expected in September 2022, introduces a new _persistent memory_ feature that makes AWK scripting easier and -sometimes improves performance. The new feature, called "pm-'gawk'," -can "remember" script-defined variables and functions across executions +sometimes improves performance. The new feature, called “pm-‘gawk’,” +can “remember” script-defined variables and functions across executions and can pass variables and functions between unrelated scripts without -serializing/parsing text files--all with near-zero fuss. pm-'gawk' does +serializing/parsing text files—all with near-zero fuss. pm-‘gawk’ does _not_ require non-volatile memory hardware nor any other exotic infrastructure; it runs on the ordinary conventional computers and operating systems that most of us have been using for decades. -The main 'gawk' documentation(1) covers the basics of the new +The main ‘gawk’ documentation(1) covers the basics of the new persistence feature. This supplementary manual provides additional -detail, tutorial examples, and a peek under the hood of pm-'gawk'. If -you're familiar with 'gawk' and Unix-like environments, dive straight +detail, tutorial examples, and a peek under the hood of pm-‘gawk’. If +you’re familiar with ‘gawk’ and Unix-like environments, dive straight in: - * *note Quick Start:: hits the ground running with a few keystrokes. - * *note Examples:: shows how pm-'gawk' streamlines typical AWK + • *note Quick Start:: hits the ground running with a few keystrokes. + • *note Examples:: shows how pm-‘gawk’ streamlines typical AWK scripting. - * *note Performance:: covers asymptotic efficiency, OS tuning, and + • *note Performance:: covers asymptotic efficiency, OS tuning, and more. - * *note Data Integrity:: explains how to protect data from mishaps. - * *note Acknowledgments:: thanks those who made pm-'gawk' happen. - * *note Installation:: shows where obtain pm-'gawk'. - * *note Debugging:: explains how to handle suspected bugs. - * *note History:: traces pm-'gawk''s persistence technology. + • *note Data Integrity:: explains how to protect data from mishaps. + • *note Acknowledgments:: thanks those who made pm-‘gawk’ happen. + • *note Installation:: shows where obtain pm-‘gawk’. + • *note Debugging:: explains how to handle suspected bugs. + • *note History:: traces pm-‘gawk’’s persistence technology. -You can find the latest version of this manual, and also the "director's -cut," at the web site for the persistent memory allocator used in -pm-'gawk': +You can find the latest version of this manual, and also the “director’s +cut,” at the web site for the persistent memory allocator used in +pm-‘gawk’: <http://web.eecs.umich.edu/~tpkelly/pma/> Two publications describe the persistent memory allocator and early -experiences with a pm-'gawk' prototype based on a fork of the official -'gawk' sources: - * <https://queue.acm.org/detail.cfm?id=3534855> - * +experiences with a pm-‘gawk’ prototype based on a fork of the official +‘gawk’ sources: + • <https://queue.acm.org/detail.cfm?id=3534855> + • <http://nvmw.ucsd.edu/nvmw2022-program/nvmw2022-data/nvmw2022-paper35-final_version_your_extended_abstract.pdf> @@ -116,8 +116,8 @@ Feel free to send me questions, suggestions, and experiences: ---------- Footnotes ---------- - (1) See <https://www.gnu.org/software/gawk/manual/> and 'man -gawk' and 'info gawk'. + (1) See <https://www.gnu.org/software/gawk/manual/> and ‘man +gawk’ and ‘info gawk’. File: pm-gawk.info, Node: Quick Start, Next: Examples, Prev: Introduction, Up: Top @@ -125,34 +125,34 @@ File: pm-gawk.info, Node: Quick Start, Next: Examples, Prev: Introduction, U 2 Quick Start ************* -Here's pm-'gawk' in action at the 'bash' shell prompt ('$'): +Here’s pm-‘gawk’ in action at the ‘bash’ shell prompt (‘$’): $ truncate -s 4096000 heap.pma $ export GAWK_PERSIST_FILE=heap.pma $ gawk 'BEGIN{myvar = 47}' $ gawk 'BEGIN{myvar += 7; print myvar}' 54 -First, 'truncate' creates an empty (all-zero-bytes) "heap file" where -pm-'gawk' will store script variables; its size is a multiple of the -system page size (4 KiB). Next, 'export' sets an environment variable -that enables pm-'gawk' to find the heap file; if 'gawk' does _not_ see +First, ‘truncate’ creates an empty (all-zero-bytes) “heap file” where +pm-‘gawk’ will store script variables; its size is a multiple of the +system page size (4 KiB). Next, ‘export’ sets an environment variable +that enables pm-‘gawk’ to find the heap file; if ‘gawk’ does _not_ see this envar, persistence is not activated. The third command runs a -one-line AWK script that initializes variable 'myvar', which will reside +one-line AWK script that initializes variable ‘myvar’, which will reside in the heap file and thereby outlive the interpreter process that -initialized it. Finally, the fourth command invokes pm-'gawk' on a -_different_ one-line script that increments and prints 'myvar'. The -output shows that pm-'gawk' has indeed "remembered" 'myvar' across -executions of unrelated scripts. (If the 'gawk' executable in your -search '$PATH' lacks the persistence feature, the output in the above -example will be '7' instead of '54'. *Note Installation::.) To disable -persistence until you want it again, prevent 'gawk' from finding the -heap file via 'unset GAWK_PERSIST_FILE'. To permanently "forget" script +initialized it. Finally, the fourth command invokes pm-‘gawk’ on a +_different_ one-line script that increments and prints ‘myvar’. The +output shows that pm-‘gawk’ has indeed “remembered” ‘myvar’ across +executions of unrelated scripts. (If the ‘gawk’ executable in your +search ‘$PATH’ lacks the persistence feature, the output in the above +example will be ‘7’ instead of ‘54’. *Note Installation::.) To disable +persistence until you want it again, prevent ‘gawk’ from finding the +heap file via ‘unset GAWK_PERSIST_FILE’. To permanently “forget” script variables, delete the heap file. - Toggling persistence by 'export'-ing and 'unset'-ing "ambient" envars -requires care: Forgetting to 'unset' when you no longer want persistence -can cause confusing bugs. Fortunately, 'bash' allows you to pass envars + Toggling persistence by ‘export’-ing and ‘unset’-ing “ambient” envars +requires care: Forgetting to ‘unset’ when you no longer want persistence +can cause confusing bugs. Fortunately, ‘bash’ allows you to pass envars more deliberately, on a per-command basis: $ rm heap.pma # start fresh $ unset GAWK_PERSIST_FILE # eliminate ambient envar @@ -163,10 +163,10 @@ more deliberately, on a per-command basis: 7 $ GAWK_PERSIST_FILE=heap.pma gawk 'BEGIN{myvar += 7; print myvar}' 54 -The first 'gawk' invocation sees the special envar prepended on the -command line, so it activates pm-'gawk'. The second 'gawk' invocation, +The first ‘gawk’ invocation sees the special envar prepended on the +command line, so it activates pm-‘gawk’. The second ‘gawk’ invocation, however, does _not_ see the envar and therefore does not access the -script variable stored in the heap file. The third 'gawk' invocation +script variable stored in the heap file. The third ‘gawk’ invocation does see the special envar and therefore uses the script variable from the heap file. @@ -185,8 +185,8 @@ File: pm-gawk.info, Node: Examples, Next: Performance, Prev: Quick Start, Up 3 Examples ********** -Our first example uses pm-'gawk' to streamline analysis of a prose -corpus, Mark Twain's 'Tom Sawyer' and 'Huckleberry Finn' from +Our first example uses pm-‘gawk’ to streamline analysis of a prose +corpus, Mark Twain’s ‘Tom Sawyer’ and ‘Huckleberry Finn’ from <https://gutenberg.org/files/74/74-0.txt> and <https://gutenberg.org/files/76/76-0.txt>. We first convert non-alphabetic characters to newlines (so each line has at most one @@ -194,10 +194,10 @@ word) and convert to lowercase: $ tr -c a-zA-Z '\n' < 74-0.txt | tr A-Z a-z > sawyer.txt $ tr -c a-zA-Z '\n' < 76-0.txt | tr A-Z a-z > finn.txt - It's easy to count word frequencies with AWK's associative arrays. -pm-'gawk' makes these arrays persistent, so we need not re-ingest the -entire corpus every time we ask a new question ("read once, analyze -happily ever after"): + It’s easy to count word frequencies with AWK’s associative arrays. +pm-‘gawk’ makes these arrays persistent, so we need not re-ingest the +entire corpus every time we ask a new question (“read once, analyze +happily ever after”): $ truncate -s 100M twain.pma $ export GAWK_PERSIST_FILE=twain.pma $ gawk '{ts[$1]++}' sawyer.txt # ingest @@ -205,48 +205,48 @@ happily ever after"): 92 11 $ gawk 'BEGIN{print ts["necktie"], ts["knife"]}' # query 2 27 -The 'truncate' command above creates a heap file large enough to store +The ‘truncate’ command above creates a heap file large enough to store all of the data it must eventually contain, with plenty of room to -spare. (As we'll see in *note Sparse Heap Files::, this isn't -wasteful.) The 'export' command ensures that subsequent 'gawk' -invocations activate pm-'gawk'. The first pm-'gawk' command stores 'Tom -Sawyer''s word frequencies in associative array 'ts[]'. Because this -array is persistent, subsequent pm-'gawk' commands can access it without +spare. (As we’ll see in *note Sparse Heap Files::, this isn’t +wasteful.) The ‘export’ command ensures that subsequent ‘gawk’ +invocations activate pm-‘gawk’. The first pm-‘gawk’ command stores ‘Tom +Sawyer’’s word frequencies in associative array ‘ts[]’. Because this +array is persistent, subsequent pm-‘gawk’ commands can access it without having to parse the input file again. - Expanding our analysis to encompass a second book is easy. Let's -populate a new associative array 'hf[]' with the word frequencies in -'Huckleberry Finn': + Expanding our analysis to encompass a second book is easy. Let’s +populate a new associative array ‘hf[]’ with the word frequencies in +‘Huckleberry Finn’: $ gawk '{hf[$1]++}' finn.txt -Now we can freely intermix accesses to both books' data conveniently and +Now we can freely intermix accesses to both books’ data conveniently and efficiently, without the overhead and coding fuss of repeated input parsing: $ gawk 'BEGIN{print ts["river"], hf["river"]}' 26 142 - By making AWK more interactive, pm-'gawk' invites casual -conversations with data. If we're curious what words in 'Finn' are -absent from 'Sawyer', answers (including "flapdoodle," "yellocution," -and "sockdolager") are easy to find: + By making AWK more interactive, pm-‘gawk’ invites casual +conversations with data. If we’re curious what words in ‘Finn’ are +absent from ‘Sawyer’, answers (including “flapdoodle,” “yellocution,” +and “sockdolager”) are easy to find: $ gawk 'BEGIN{for(w in hf) if (!(w in ts)) print w}' - Rumors of Twain's death may be exaggerated. If he publishes new + Rumors of Twain’s death may be exaggerated. If he publishes new books in the future, it will be easy to incorporate them into our analysis incrementally. The performance benefits of incremental processing for common AWK chores such as log file analysis are discussed in <https://queue.acm.org/detail.cfm?id=3534855> and the companion paper cited therein, and below in *note Performance::. - Exercise: The "Markov" AWK script on page 79 of Kernighan & Pike's -'The Practice of Programming' generates random text reminiscent of a + Exercise: The “Markov” AWK script on page 79 of Kernighan & Pike’s +‘The Practice of Programming’ generates random text reminiscent of a given corpus using a simple statistical modeling technique. This script -consists of a "learning" or "training" phase followed by an -output-generation phase. Use pm-'gawk' to de-couple the two phases and +consists of a “learning” or “training” phase followed by an +output-generation phase. Use pm-‘gawk’ to de-couple the two phases and to allow the statistical model to incrementally ingest additions to the input corpus. - Our second example considers another domain that plays to AWK's -strengths, data analysis. For simplicity we'll create two small input + Our second example considers another domain that plays to AWK’s +strengths, data analysis. For simplicity we’ll create two small input files of numeric data. $ printf '1\n2\n3\n4\n5\n' > A.dat $ printf '5\n6\n7\n8\n9\n' > B.dat @@ -262,26 +262,26 @@ statistics: $ gawk -f summary_conventional.awk A.dat B.dat min: 1 max: 9 mean: 5 - To use pm-'gawk' for the same purpose, we first create a heap file -for our AWK script variables and tell pm-'gawk' where to find it via the + To use pm-‘gawk’ for the same purpose, we first create a heap file +for our AWK script variables and tell pm-‘gawk’ where to find it via the usual environment variable: $ truncate -s 10M stats.pma $ export GAWK_PERSIST_FILE=stats.pma -pm-'gawk' requires changing the above script to ensure that 'min' and -'max' are initialized exactly once, when the heap file is first used, +pm-‘gawk’ requires changing the above script to ensure that ‘min’ and +‘max’ are initialized exactly once, when the heap file is first used, and _not_ every time the script runs. Furthermore, whereas -script-defined variables such as 'min' retain their values across -pm-'gawk' executions, built-in AWK variables such as 'NR' are reset to -zero every time pm-'gawk' runs, so we can't use them in the same way. -Here's a modified script for pm-'gawk': +script-defined variables such as ‘min’ retain their values across +pm-‘gawk’ executions, built-in AWK variables such as ‘NR’ are reset to +zero every time pm-‘gawk’ runs, so we can’t use them in the same way. +Here’s a modified script for pm-‘gawk’: $ cat summary_persistent.awk ! init { min = max = $1; init = 1 } min > $1 { min = $1 } max < $1 { max = $1 } { sum += $1; ++n } END { print "min: " min " max: " max " mean: " sum/n } -Note the different pattern on the first line and the introduction of 'n' -to supplant 'NR'. When used with pm-'gawk', this new initialization +Note the different pattern on the first line and the introduction of ‘n’ +to supplant ‘NR’. When used with pm-‘gawk’, this new initialization logic supports the same kind of cumulative processing that we saw in the text-analysis scenario. For example, we can ingest our input files separately: @@ -290,21 +290,21 @@ separately: $ gawk -f summary_persistent.awk B.dat min: 1 max: 9 mean: 5 -As expected, after the second pm-'gawk' invocation consumes the second +As expected, after the second pm-‘gawk’ invocation consumes the second input file, the output matches that of the non-persistent script that read both files at once. Exercise: Amend the AWK scripts above to compute the median and -mode(s) using both conventional 'gawk' and pm-'gawk'. (The median is +mode(s) using both conventional ‘gawk’ and pm-‘gawk’. (The median is the number in the middle of a sorted list; if the length of the list is even, average the two numbers at the middle. The modes are the values that occur most frequently.) - Our third and final set of examples shows that pm-'gawk' allows us to + Our third and final set of examples shows that pm-‘gawk’ allows us to bundle both script-defined data and also user-defined _functions_ in a persistent heap that may be passed freely between unrelated AWK scripts. - The following shell transcript repeatedly invokes pm-'gawk' to create + The following shell transcript repeatedly invokes pm-‘gawk’ to create and then employ a user-defined function. These separate invocations involve several different AWK scripts that communicate via the heap file. Each invocation can add user-defined functions and add or remove @@ -324,18 +324,18 @@ data from the heap that subsequent invocations will access. $ gawk 'BEGIN { for (i=0; i<47; i++) a[i]=i }' $ gawk 'BEGIN { print count(a) }' 47 -The first pm-'gawk' command creates user-defined function 'count()', +The first pm-‘gawk’ command creates user-defined function ‘count()’, which returns the number of entries in a given associative array; note -that variable 't' is local to 'count()', not global. The next pm-'gawk' +that variable ‘t’ is local to ‘count()’, not global. The next pm-‘gawk’ command populates a persistent associative array with three entries; not -surprisingly, the 'count()' call in the following pm-'gawk' command -finds these three entries. The next two pm-'gawk' commands respectively +surprisingly, the ‘count()’ call in the following pm-‘gawk’ command +finds these three entries. The next two pm-‘gawk’ commands respectively delete an array entry and print the reduced count, 2. The two commands after that delete the entire array and print a count of zero. Finally, -the last two pm-'gawk' commands populate the array with 47 entries and +the last two pm-‘gawk’ commands populate the array with 47 entries and count them. - The following shell script invokes pm-'gawk' repeatedly to create a + The following shell script invokes pm-‘gawk’ repeatedly to create a collection of user-defined functions that perform basic operations on quadratic polynomials: evaluation at a given point, computing the discriminant, and using the quadratic formula to find the roots. It @@ -368,12 +368,11 @@ File: pm-gawk.info, Node: Performance, Next: Data Integrity, Prev: Examples, ************* This chapter explains several performance advantages that result from -the implementation of persistent memory in pm-'gawk', shows how tuning +the implementation of persistent memory in pm-‘gawk’, shows how tuning the underlying operating system sometimes improves performance, and presents experimental performance measurements. To make the discussion -concrete, we use examples from a GNU/Linux system--GNU utilities atop -the Linux OS--but the principles apply to other modern operating -systems. +concrete, we use examples from a GNU/Linux system—GNU utilities atop the +Linux OS—but the principles apply to other modern operating systems. * Menu: @@ -390,52 +389,52 @@ File: pm-gawk.info, Node: Constant-Time Array Access, Next: Virtual Memory and 4.1 Constant-Time Array Access ============================== -pm-'gawk' preserves the efficiency of data access when data structures +pm-‘gawk’ preserves the efficiency of data access when data structures are created by one process and later re-used by a different process. - Consider the associative arrays used to analyze Mark Twain's books in -*note Examples::. We created arrays 'ts[]' and 'hf[]' by reading files -'sawyer.txt' and 'finn.txt'. If N denotes the total volume of data in + Consider the associative arrays used to analyze Mark Twain’s books in +*note Examples::. We created arrays ‘ts[]’ and ‘hf[]’ by reading files +‘sawyer.txt’ and ‘finn.txt’. If N denotes the total volume of data in these files, building the associative arrays typically requires time -proportional to N, or "O(N) expected time" in the lingo of asymptotic +proportional to N, or “O(N) expected time” in the lingo of asymptotic analysis. If W is the number of unique words in the input files, the size of the associative arrays will be proportional to W, or O(W). Accessing individual array elements requires only _constant_ or O(1) -expected time, not O(N) or O(W) time, because 'gawk' implements arrays +expected time, not O(N) or O(W) time, because ‘gawk’ implements arrays as hash tables. - The performance advantage of pm-'gawk' arises when different + The performance advantage of pm-‘gawk’ arises when different processes create and access associative arrays. Accessing an element of -a persistent array created by a previous pm-'gawk' process, as we did +a persistent array created by a previous pm-‘gawk’ process, as we did earlier in BEGIN{print ts["river"], hf["river"]}, still requires only O(1) time, which is asymptotically far superior to the alternatives. Naïvely reconstructing arrays by re-ingesting all raw inputs in every -'gawk' process that accesses the arrays would of course require O(N) -time--a profligate cost if the text corpus is large. Dumping arrays to +‘gawk’ process that accesses the arrays would of course require O(N) +time—a profligate cost if the text corpus is large. Dumping arrays to files and re-loading them as needed would reduce the preparation time for access to O(W). That can be a substantial improvement in practice; N is roughly 19 times larger than W in our Twain corpus. Nonetheless O(W) -remains far slower than pm-'gawk''s O(1). As we'll see in *note +remains far slower than pm-‘gawk’’s O(1). As we’ll see in *note Results::, the difference is not merely theoretical. - The persistent memory implementation beneath pm-'gawk' enables it to + The persistent memory implementation beneath pm-‘gawk’ enables it to avoid work proportional to N or W when accessing an element of a -persistent associative array. Under the hood, pm-'gawk' stores +persistent associative array. Under the hood, pm-‘gawk’ stores script-defined AWK variables such as associative arrays in a persistent heap laid out in a memory-mapped file (the heap file). When an AWK -script accesses an element of an associative array, pm-'gawk' performs a +script accesses an element of an associative array, pm-‘gawk’ performs a lookup on the corresponding hash table, which in turn accesses memory on the persistent heap. Modern operating systems implement memory-mapped files in such a way that these memory accesses trigger the bare minimum of data movement required: Only those parts of the heap file containing -needed data are "paged in" to the memory of the pm-'gawk' process. In -the worst case, the heap file is not in the file system's in-memory +needed data are “paged in” to the memory of the pm-‘gawk’ process. In +the worst case, the heap file is not in the file system’s in-memory cache, so the required pages must be faulted into memory from storage. Our asymptotic analysis of efficiency applies regardless of whether the heap file is cached or not. The entire heap file is _not_ accessed merely to access an element of a persistent associative array. - Persistent memory thus enables pm-'gawk' to offer the flexibility of + Persistent memory thus enables pm-‘gawk’ to offer the flexibility of de-coupling data ingestion from analytic queries without the fuss and overhead of serializing and loading data structures and without sacrificing constant-time access to the associative arrays that make AWK @@ -448,49 +447,49 @@ File: pm-gawk.info, Node: Virtual Memory and Big Data, Next: Sparse Heap Files =============================== Small data sets seldom spoil the delights of AWK by causing performance -troubles, with or without persistence. As the size of the 'gawk' -interpreter's internal data structures approaches the capacity of +troubles, with or without persistence. As the size of the ‘gawk’ +interpreter’s internal data structures approaches the capacity of physical memory, however, acceptable performance requires understanding modern operating systems and sometimes tuning them. Fortunately -pm-'gawk' offers new degrees of control for performance-conscious users +pm-‘gawk’ offers new degrees of control for performance-conscious users tackling large data sets. A terse mnemonic captures the basic principle: Precluding paging promotes peak performance and prevents perplexity. - Modern operating systems feature "virtual memory" that strives to + Modern operating systems feature “virtual memory” that strives to appear both larger than installed DRAM (which is small) and faster than -installed storage devices (which are slow). As a program's memory +installed storage devices (which are slow). As a program’s memory footprint approaches the capacity of DRAM, the virtual memory system -transparently "pages" (moves) the program's data between DRAM and a -"swap area" on a storage device. Paging can degrade performance mildly -or severely, depending on the program's memory access patterns. Random +transparently “pages” (moves) the program’s data between DRAM and a +“swap area” on a storage device. Paging can degrade performance mildly +or severely, depending on the program’s memory access patterns. Random accesses to large data structures can trigger excessive paging and -dramatic slowdown. Unfortunately, the hash tables beneath AWK's +dramatic slowdown. Unfortunately, the hash tables beneath AWK’s signature associative arrays inherently require random memory accesses, so large associative arrays can be problematic. Persistence changes the rules in our favor: The OS pages data to -pm-'gawk''s _heap file_ instead of the swap area. This won't help +pm-‘gawk’’s _heap file_ instead of the swap area. This won’t help performance much if the heap file resides in a conventional storage-backed file system. On Unix-like systems, however, we may place -the heap file in a DRAM-backed file system such as '/dev/shm/', which +the heap file in a DRAM-backed file system such as ‘/dev/shm/’, which entirely prevents paging to slow storage devices. Temporarily placing the heap file in such a file system is a reasonable expedient, with two caveats: First, keep in mind that DRAM-backed file systems perish when the machine reboots or crashes, so you must copy the heap file to a conventional storage-backed file system when your computation is done. -Second, pm-'gawk''s memory footprint can't exceed available DRAM if you +Second, pm-‘gawk’’s memory footprint can’t exceed available DRAM if you place the heap file in a DRAM-backed file system. Tuning OS paging parameters may improve performance if you decide to -run pm-'gawk' with a heap file in a conventional storage-backed file +run pm-‘gawk’ with a heap file in a conventional storage-backed file system. Some OSes have unhelpful default habits regarding modified -("dirty") memory backed by files. For example, the OS may write dirty +(“dirty”) memory backed by files. For example, the OS may write dirty memory pages to the heap file periodically and/or when the OS believes -that "too much" memory is dirty. Such "eager" writeback can degrade -performance noticeably and brings no benefit to pm-'gawk'. Fortunately +that “too much” memory is dirty. Such “eager” writeback can degrade +performance noticeably and brings no benefit to pm-‘gawk’. Fortunately some OSes allow paging defaults to be over-ridden so that writeback is -"lazy" rather than eager. For Linux see the discussion of the 'dirty_*' +“lazy” rather than eager. For Linux see the discussion of the ‘dirty_*’ parameters at <https://www.kernel.org/doc/html/latest/admin-guide/sysctl/vm.html>. Changing these parameters can prevent wasteful eager paging:(1) @@ -498,13 +497,13 @@ Changing these parameters can prevent wasteful eager paging:(1) $ echo 100 | sudo tee /proc/sys/vm/dirty_ratio $ echo 300000 | sudo tee /proc/sys/vm/dirty_expire_centisecs $ echo 50000 | sudo tee /proc/sys/vm/dirty_writeback_centisecs -Tuning paging parameters can help non-persistent 'gawk' as well as -pm-'gawk'. [Disclaimer: OS tuning is an occult art, and your mileage +Tuning paging parameters can help non-persistent ‘gawk’ as well as +pm-‘gawk’. [Disclaimer: OS tuning is an occult art, and your mileage may vary.] ---------- Footnotes ---------- - (1) The 'tee' rigmarole is explained at + (1) The ‘tee’ rigmarole is explained at <https://askubuntu.com/questions/1098059/which-is-the-right-way-to-drop-caches-in-lubuntu>. @@ -513,55 +512,55 @@ File: pm-gawk.info, Node: Sparse Heap Files, Next: Persistence versus Durabili 4.3 Sparse Heap Files ===================== -To be frugal with storage resources, pm-'gawk''s heap file should be -created as a "sparse file": a file whose logical size is larger than its +To be frugal with storage resources, pm-‘gawk’’s heap file should be +created as a “sparse file”: a file whose logical size is larger than its storage resource footprint. Modern file systems support sparse files, -which are easy to create using the 'truncate' tool shown in our +which are easy to create using the ‘truncate’ tool shown in our examples. - Let's first create a conventional _non_-sparse file using 'echo': + Let’s first create a conventional _non_-sparse file using ‘echo’: $ echo hi > dense $ ls -l dense -rw-rw-r--. 1 me me 3 Aug 5 23:08 dense $ du -h dense 4.0K dense -The 'ls' utility reports that file 'dense' is three bytes long (two for -the letters in "hi" plus one for the newline). The 'du' utility reports -that this file consumes 4 KiB of storage--one block of disk, as small as -a non-sparse file's storage footprint can be. Now let's use 'truncate' +The ‘ls’ utility reports that file ‘dense’ is three bytes long (two for +the letters in “hi” plus one for the newline). The ‘du’ utility reports +that this file consumes 4 KiB of storage—one block of disk, as small as +a non-sparse file’s storage footprint can be. Now let’s use ‘truncate’ to create a logically enormous sparse file and check its physical size: $ truncate -s 1T sparse $ ls -l sparse -rw-rw-r--. 1 me me 1099511627776 Aug 5 22:33 sparse $ du -h sparse 0 sparse -Whereas 'ls' reports the logical file size that we expect (one TiB or 2 -raised to the power 40 bytes), 'du' reveals that the file occupies no +Whereas ‘ls’ reports the logical file size that we expect (one TiB or 2 +raised to the power 40 bytes), ‘du’ reveals that the file occupies no storage whatsoever. The file system will allocate physical storage resources beneath this file as data is written to it; reading unwritten regions of the file yields zeros. - The "pay as you go" storage cost of sparse files offers both -convenience and control for pm-'gawk' users. If your file system + The “pay as you go” storage cost of sparse files offers both +convenience and control for pm-‘gawk’ users. If your file system supports sparse files, go ahead and create lavishly capacious heap files -for pm-'gawk'. Their logical size costs nothing and persistent memory -allocation within pm-'gawk' won't fail until physical storage resources +for pm-‘gawk’. Their logical size costs nothing and persistent memory +allocation within pm-‘gawk’ won’t fail until physical storage resources beneath the file system are exhausted. But if instead you want to _prevent_ a heap file from consuming too much storage, simply set its -initial size to whatever bound you wish to enforce; it won't eat more -disk than that. Copying sparse files with GNU 'cp' creates sparse +initial size to whatever bound you wish to enforce; it won’t eat more +disk than that. Copying sparse files with GNU ‘cp’ creates sparse copies by default. File-system encryption can preclude sparse files: If the cleartext of a byte offset range within a file is all zero bytes, the corresponding -ciphertext probably shouldn't be all zeros! Encrypting at the storage +ciphertext probably shouldn’t be all zeros! Encrypting at the storage layer instead of the file system layer may offer acceptable security while still permitting file systems to implement sparse files. Sometimes you might prefer a dense heap file backed by pre-allocated storage resources, for example to increase the likelihood that -pm-'gawk''s internal memory allocation will succeed until the persistent -heap occupies the entire heap file. The 'fallocate' utility will do the +pm-‘gawk’’s internal memory allocation will succeed until the persistent +heap occupies the entire heap file. The ‘fallocate’ utility will do the trick: $ fallocate -l 1M mibi $ ls -l mibi @@ -577,37 +576,37 @@ File: pm-gawk.info, Node: Persistence versus Durability, Next: Experiments, P ================================= Arguably the most important general guideline for good performance in -computer systems is, "pay only for what you need."(1) To apply this -maxim to pm-'gawk' we must distinguish two concepts that are frequently +computer systems is, “pay only for what you need.”(1) To apply this +maxim to pm-‘gawk’ we must distinguish two concepts that are frequently conflated: persistence and durability.(2) (A third logically distinct concept is the subject of *note Data Integrity::.) - "Persistent" data outlive the processes that access them, but don't -necessarily last forever. For example, as explained in 'man -mq_overview', message queues are persistent because they exist until the -system shuts down. "Durable" data reside on a physical medium that + “Persistent” data outlive the processes that access them, but don’t +necessarily last forever. For example, as explained in ‘man +mq_overview’, message queues are persistent because they exist until the +system shuts down. “Durable” data reside on a physical medium that retains its contents even without continuously supplied power. For example, hard disk drives and solid state drives store durable data. Confusion arises because persistence and durability are often correlated: Data in ordinary file systems backed by HDDs or SSDs are -typically both persistent and durable. Familiarity with 'fsync()' and -'msync()' might lead us to believe that durability is a subset of +typically both persistent and durable. Familiarity with ‘fsync()’ and +‘msync()’ might lead us to believe that durability is a subset of persistence, but in fact the two characteristics are orthogonal: Data in the swap area are durable but not persistent; data in DRAM-backed file -systems such as '/dev/shm/' are persistent but not durable. +systems such as ‘/dev/shm/’ are persistent but not durable. Durability often costs more than persistence, so -performance-conscious pm-'gawk' users pay the added premium for +performance-conscious pm-‘gawk’ users pay the added premium for durability only when persistence alone is not sufficient. Two ways to avoid unwanted durability overheads were discussed in *note Virtual -Memory and Big Data::: Place pm-'gawk''s heap file in a DRAM-backed file +Memory and Big Data::: Place pm-‘gawk’’s heap file in a DRAM-backed file system, or disable eager writeback to the heap file. Expedients such as these enable you to remove durability overheads from the critical path of multi-stage data analyses even when you want heap files to eventually -be durable: Allow pm-'gawk' to run at full speed with persistence alone; -force the heap file to durability (using the 'cp' and 'sync' utilities +be durable: Allow pm-‘gawk’ to run at full speed with persistence alone; +force the heap file to durability (using the ‘cp’ and ‘sync’ utilities as necessary) after output has been emitted to the next stage of the -analysis and the pm-'gawk' process using the heap has terminated. +analysis and the pm-‘gawk’ process using the heap has terminated. Experimenting with synthetic data builds intuition for how persistence and durability affect performance. You can write a little @@ -615,17 +614,17 @@ AWK or C program to generate a stream of random text, or just cobble together a quick and dirty generator on the command line: $ openssl rand --base64 1000000 | tr -c a-zA-Z '\n' > random.dat Varying the size of random inputs can, for example, find where -performance "falls off the cliff" as pm-'gawk''s memory footprint +performance “falls off the cliff” as pm-‘gawk’’s memory footprint exceeds the capacity of DRAM and paging begins. Experiments require careful methodology, especially when the heap -file is in a storage-backed file system. Overlooking the file system's +file is in a storage-backed file system. Overlooking the file system’s DRAM cache can easily misguide interpretation of results and foil repeatability. Fortunately Linux allows us to invalidate the file -system cache and thus mimic a "cold start" condition resembling the +system cache and thus mimic a “cold start” condition resembling the immediate aftermath of a machine reboot. Accesses to ordinary files on durable storage will then be served from the storage devices, not from -cache. Read about 'sync' and '/proc/sys/vm/drop_caches' at +cache. Read about ‘sync’ and ‘/proc/sys/vm/drop_caches’ at <https://www.kernel.org/doc/html/latest/admin-guide/sysctl/vm.html>. ---------- Footnotes ---------- @@ -635,11 +634,11 @@ Certain well-known textbook algorithms continue to grind away fruitlessly long after having computed all of their output. See <https://queue.acm.org/detail.cfm?id=3424304>. - (2) In recent years the term "persistent memory" has sometimes been -used to denote novel byte-addressable non-volatile memory hardware--an + (2) In recent years the term “persistent memory” has sometimes been +used to denote novel byte-addressable non-volatile memory hardware—an unfortunate practice that contradicts sensible long-standing convention and causes needless confusion. NVM provides durability. Persistent -memory is a software abstraction that doesn't require NVM. See +memory is a software abstraction that doesn’t require NVM. See <https://queue.acm.org/detail.cfm?id=3358957>. @@ -648,7 +647,7 @@ File: pm-gawk.info, Node: Experiments, Next: Results, Prev: Persistence versu 4.5 Experiments =============== -The C-shell ('csh') script listed below illustrates concepts and +The C-shell (‘csh’) script listed below illustrates concepts and implements tips presented in this chapter. It produced the results discussed in *note Results:: in roughly 20 minutes on an aging laptop. You can cut and paste the code listing below into a file, or download it @@ -658,31 +657,31 @@ from <http://web.eecs.umich.edu/~tpkelly/pma/>. word frequency queries over a text corpus: The naïve approach of reading the corpus into an associative array for every query; manually dumping a text representation of the word-frequency table and manually loading it -prior to a query; using 'gawk''s 'rwarray' extension to dump and load an -associative array; and using pm-'gawk' to maintain a persistent +prior to a query; using ‘gawk’’s ‘rwarray’ extension to dump and load an +associative array; and using pm-‘gawk’ to maintain a persistent associative array. - Comments at the top explain prerequisites. Lines 8-10 set input + Comments at the top explain prerequisites. Lines 8–10 set input parameters: the directory where tests are run and where files including the heap file are held, the off-the-shelf timer used to measure run times and other performance characteristics such as peak memory usage, -and the size of the input. The default input size results in pm-'gawk' +and the size of the input. The default input size results in pm-‘gawk’ memory footprints under 3 GiB, which is large enough for interesting -results and small enough to fit in DRAM and avoid paging on today's -computers. Lines 13-14 define a homebrew timer. +results and small enough to fit in DRAM and avoid paging on today’s +computers. Lines 13–14 define a homebrew timer. Two sections of the script are relevant if the default run directory -is changed from '/dev/shm/' to a directory in a conventional -storage-backed file system: Lines 15-17 define the mechanism for -clearing file data cached in DRAM; lines 23-30 set Linux kernel +is changed from ‘/dev/shm/’ to a directory in a conventional +storage-backed file system: Lines 15–17 define the mechanism for +clearing file data cached in DRAM; lines 23–30 set Linux kernel parameters to discourage eager paging. - Lines 37-70 spit out, compile, and run a little C program to generate + Lines 37–70 spit out, compile, and run a little C program to generate a random text corpus. This program is fast, flexible, and deterministic, generating the same random output given the same parameters. - Lines 71-100 run the four different AWK approaches on the same random + Lines 71–100 run the four different AWK approaches on the same random input, reporting separately the time to build and to query the associative array containing word frequencies. @@ -808,7 +807,7 @@ results. Keep in mind that performance measurements are often sensitive to seemingly irrelevant factors. For example, the program that runs first may have the advantage of a cooler CPU; later contestants may start with a hot CPU and consequent clock throttling by a modern -processor's thermal regulation apparatus. Very generally, performance +processor’s thermal regulation apparatus. Very generally, performance measurement is a notoriously tricky business. For scripting, whose main motive is convenience rather than speed, the proper role for performance measurements is to qualitatively test hypotheses such as those that @@ -833,32 +832,32 @@ Constant-Time Array Access::. All four approaches require roughly four minutes to read the synthetic input data. The naïve approach must do this every time it performs a query, but the other three build an associative array to support queries and separately serve such queries. -The 'freqtbl' and 'rwarray' approaches build an associative array of +The ‘freqtbl’ and ‘rwarray’ approaches build an associative array of word frequencies, serialize it to an intermediate file, and then read the entire intermediate file prior to serving queries; the former does -this manually and the latter uses a 'gawk' extension. Both can serve -queries in roughly ten seconds, not four minutes. As we'd expect from +this manually and the latter uses a ‘gawk’ extension. Both can serve +queries in roughly ten seconds, not four minutes. As we’d expect from the asymptotic analysis, performing work proportional to the number of words is preferable to work proportional to the size of the raw input -corpus: O(W) time is faster than O(N). And as we'd expect, pm-'gawk''s +corpus: O(W) time is faster than O(N). And as we’d expect, pm-‘gawk’’s constant-time queries are faster still, by roughly two orders of -magnitude. For the computations considered here, pm-'gawk' makes the +magnitude. For the computations considered here, pm-‘gawk’ makes the difference between blink-of-an-eye interactive queries and response -times long enough for the user's mind to wander. +times long enough for the user’s mind to wander. - Whereas 'freqtbl' and 'rwarray' reconstruct an associative array -prior to accessing an individual element, pm-'gawk' stores a ready-made -associative array in persistent memory. That's why its intermediate + Whereas ‘freqtbl’ and ‘rwarray’ reconstruct an associative array +prior to accessing an individual element, pm-‘gawk’ stores a ready-made +associative array in persistent memory. That’s why its intermediate file (the heap file) is much larger than the other two intermediate -files, why the heap file is nearly as large as pm-'gawk''s peak memory +files, why the heap file is nearly as large as pm-‘gawk’’s peak memory footprint while building the persistent array, and why its memory footprint is very small while serving a query that accesses a single array element. The upside of the large heap file is O(1) access instead -of O(W)--a classic time-space tradeoff. If storage is a scarce -resource, all three intermediate files can be compressed, 'freqtbl' by a -factor of roughly 2.7, 'rwarray' by roughly 5.6x, and pm-'gawk' by -roughly 11x using 'xz'. Compression is CPU-intensive and slow, another -time-space tradeoff. +of O(W)—a classic time-space tradeoff. If storage is a scarce resource, +all three intermediate files can be compressed, ‘freqtbl’ by a factor of +roughly 2.7, ‘rwarray’ by roughly 5.6x, and pm-‘gawk’ by roughly 11x +using ‘xz’. Compression is CPU-intensive and slow, another time-space +tradeoff. File: pm-gawk.info, Node: Data Integrity, Next: Acknowledgments, Prev: Performance, Up: Top @@ -871,7 +870,7 @@ command-line typos can harm your data, but precautions can mitigate these risks. In scripting scenarios it usually suffices to create safe backups of important files at appropriate times. As simple as this sounds, care is needed to achieve genuine protection and to reduce the -costs of backups. Here's a prudent yet frugal way to back up a heap +costs of backups. Here’s a prudent yet frugal way to back up a heap file between uses: $ backup_base=heap_bk_`date +%s` $ cp --reflink=always heap.pma $backup_base.pma @@ -888,49 +887,49 @@ Timestamps in backup filenames make it easy to find the most recent copy if the heap file is damaged, even if last-mod metadata are inadvertently altered. - The 'cp' command's '--reflink' option reduces both the storage + The ‘cp’ command’s ‘--reflink’ option reduces both the storage footprint of the copy and the time required to make it. Just as sparse -files provide "pay as you go" storage footprints, reflink copying offers -"pay as you _change_" storage costs.(1) A reflink copy shares storage +files provide “pay as you go” storage footprints, reflink copying offers +“pay as you _change_” storage costs.(1) A reflink copy shares storage with the original file. The file system ensures that subsequent changes -to either file don't affect the other. Reflink copying is not available +to either file don’t affect the other. Reflink copying is not available on all file systems; XFS, BtrFS, and OCFS2 currently support it.(2) Fortunately you can install, say, an XFS file system _inside an ordinary -file_ on some other file system, such as 'ext4'.(3) +file_ on some other file system, such as ‘ext4’.(3) - After creating a backup copy of the heap file we use 'sync' to force + After creating a backup copy of the heap file we use ‘sync’ to force it down to durable media. Otherwise the copy may reside only in -volatile DRAM memory--the file system's cache--where an OS crash or -power failure could corrupt it.(4) After 'sync'-ing the backup we -create and 'sync' a "success indicator" file with extension '.done' to -address a nasty corner case: Power may fail _while_ a backup is being -copied from the primary heap file, leaving either file, or both, corrupt -on storage--a particularly worrisome possibility for jobs that run -unattended. Upon reboot, each '.done' file attests that the +volatile DRAM memory—the file system’s cache—where an OS crash or power +failure could corrupt it.(4) After ‘sync’-ing the backup we create and +‘sync’ a “success indicator” file with extension ‘.done’ to address a +nasty corner case: Power may fail _while_ a backup is being copied from +the primary heap file, leaving either file, or both, corrupt on +storage—a particularly worrisome possibility for jobs that run +unattended. Upon reboot, each ‘.done’ file attests that the corresponding backup succeeded, making it easy to identify the most recent successful backup. - Finally, if you're serious about tolerating failures you must "train -as you would fight" by testing your hardware/software stack against + Finally, if you’re serious about tolerating failures you must “train +as you would fight” by testing your hardware/software stack against realistic failures. For realistic power-failure testing, see <https://queue.acm.org/detail.cfm?id=3400902>. ---------- Footnotes ---------- (1) The system call that implements reflink copying is described in -'man ioctl_ficlone'. +‘man ioctl_ficlone’. - (2) The '--reflink' option creates copies as sparse as the original. -If reflink copying is not available, '--sparse=always' should be used. + (2) The ‘--reflink’ option creates copies as sparse as the original. +If reflink copying is not available, ‘--sparse=always’ should be used. (3) See <https://www.usenix.org/system/files/login/articles/login_winter19_08_kelly.pdf>. - (4) On some OSes 'sync' provides very weak guarantees, but on Linux -'sync' returns only after all file system data are flushed down to -durable storage. If your 'sync' is unreliable, write a little C program -that calls 'fsync()' to flush a file. To be safe, also call 'fsync()' -on every enclosing directory on the file's 'realpath()' up to the root. + (4) On some OSes ‘sync’ provides very weak guarantees, but on Linux +‘sync’ returns only after all file system data are flushed down to +durable storage. If your ‘sync’ is unreliable, write a little C program +that calls ‘fsync()’ to flush a file. To be safe, also call ‘fsync()’ +on every enclosing directory on the file’s ‘realpath()’ up to the root. File: pm-gawk.info, Node: Acknowledgments, Next: Installation, Prev: Data Integrity, Up: Top @@ -938,18 +937,18 @@ File: pm-gawk.info, Node: Acknowledgments, Next: Installation, Prev: Data Int 6 Acknowledgments ***************** -Haris Volos, Zi Fan Tan, and Jianan Li developed a persistent 'gawk' -prototype based on a fork of the 'gawk' source. Advice from 'gawk' +Haris Volos, Zi Fan Tan, and Jianan Li developed a persistent ‘gawk’ +prototype based on a fork of the ‘gawk’ source. Advice from ‘gawk’ maintainer Arnold Robbins to me, which I forwarded to them, proved very -helpful. Robbins moreover implemented, documented, and tested pm-'gawk' -for the official version of 'gawk'; along the way he suggested numerous -improvements for the 'pma' memory allocator beneath pm-'gawk'. Corinna -Vinschen suggested other improvements to 'pma' and tested pm-'gawk' on +helpful. Robbins moreover implemented, documented, and tested pm-‘gawk’ +for the official version of ‘gawk’; along the way he suggested numerous +improvements for the ‘pma’ memory allocator beneath pm-‘gawk’. Corinna +Vinschen suggested other improvements to ‘pma’ and tested pm-‘gawk’ on Cygwin. Nelson H. F. Beebe provided access to Solaris machines for testing. Robbins, Volos, Li, Tan, Jon Bentley, and Hans Boehm reviewed drafts of this user manual and provided useful feedback. Bentley suggested the min/max/mean example in *note Examples::, and also the -exercise of making Kernighan & Pike's "Markov" script persistent. Volos +exercise of making Kernighan & Pike’s “Markov” script persistent. Volos provided and tested the advice on tuning OS parameters in *note Virtual Memory and Big Data::. Stan Park provided insights about virtual memory, file systems, and utilities. @@ -960,16 +959,16 @@ File: pm-gawk.info, Node: Installation, Next: Debugging, Prev: Acknowledgment Appendix A Installation *********************** -'gawk' 5.2 featuring persistent memory is expected to be released in +‘gawk’ 5.2 featuring persistent memory is expected to be released in September 2022; look for it at <http://ftp.gnu.org/gnu/gawk/>. If 5.2 is not released yet, the master git branch is available at <http://git.savannah.gnu.org/cgit/gawk.git/snapshot/gawk-master.tar.gz>. -Unpack the tarball, run './bootstrap.sh', './configure', 'make', and -'make check', then try some of the examples presented earlier. In the -normal course of events, 5.2 and later 'gawk' releases featuring -pm-'gawk' will appear in the software package management systems of -major GNU/Linux distros. Eventually pm-'gawk' will be available in the -default 'gawk' on such systems. +Unpack the tarball, run ‘./bootstrap.sh’, ‘./configure’, ‘make’, and +‘make check’, then try some of the examples presented earlier. In the +normal course of events, 5.2 and later ‘gawk’ releases featuring +pm-‘gawk’ will appear in the software package management systems of +major GNU/Linux distros. Eventually pm-‘gawk’ will be available in the +default ‘gawk’ on such systems. File: pm-gawk.info, Node: Debugging, Next: History, Prev: Installation, Up: Top @@ -977,32 +976,32 @@ File: pm-gawk.info, Node: Debugging, Next: History, Prev: Installation, Up: Appendix B Debugging ******************** -For bugs unrelated to persistence, see the 'gawk' documentation, e.g., -'GAWK: Effective AWK Programming', available at +For bugs unrelated to persistence, see the ‘gawk’ documentation, e.g., +‘GAWK: Effective AWK Programming’, available at <https://www.gnu.org/software/gawk/manual/>. - If pm-'gawk' doesn't behave as you expect, first consider whether -you're using the heap file that you intend; using the wrong heap file is + If pm-‘gawk’ doesn’t behave as you expect, first consider whether +you’re using the heap file that you intend; using the wrong heap file is a common mistake. Other fertile sources of bugs for newcomers are the -fact that a 'BEGIN' block is executed every time pm-'gawk' runs, which -isn't always what you really want, and the fact that built-in AWK -variables such as 'NR' are always reset to zero every time the +fact that a ‘BEGIN’ block is executed every time pm-‘gawk’ runs, which +isn’t always what you really want, and the fact that built-in AWK +variables such as ‘NR’ are always reset to zero every time the interpreter runs. See the discussion of initialization surrounding the min/max/mean script in *note Examples::. - If you suspect a persistence-related bug in pm-'gawk', you can set an -environment variable that will cause its persistent heap module, 'pma', -to emit more verbose error messages; for details see the main 'gawk' + If you suspect a persistence-related bug in pm-‘gawk’, you can set an +environment variable that will cause its persistent heap module, ‘pma’, +to emit more verbose error messages; for details see the main ‘gawk’ documentation. - Programmers: You can re-compile 'gawk' with assertions enabled, which -will trigger extensive integrity checks within 'pma'. Ensure that -'pma.c' is compiled _without_ the '-DNDEBUG' flag when 'make' builds -'gawk'. Run the resulting executable on small inputs, because the + Programmers: You can re-compile ‘gawk’ with assertions enabled, which +will trigger extensive integrity checks within ‘pma’. Ensure that +‘pma.c’ is compiled _without_ the ‘-DNDEBUG’ flag when ‘make’ builds +‘gawk’. Run the resulting executable on small inputs, because the integrity checks can be very slow. If assertions fail, that likely -indicates bugs somewhere in pm-'gawk'. Report such bugs to me (Terence -Kelly) and also following the procedures in the main 'gawk' -documentation. Specify what version of 'gawk' you're using, and try to +indicates bugs somewhere in pm-‘gawk’. Report such bugs to me (Terence +Kelly) and also following the procedures in the main ‘gawk’ +documentation. Specify what version of ‘gawk’ you’re using, and try to provide a small and simple script that reliably reproduces the bug. @@ -1011,43 +1010,43 @@ File: pm-gawk.info, Node: History, Prev: Debugging, Up: Top Appendix C History ****************** -The pm-'gawk' persistence feature is based on a new persistent memory -allocator, 'pma', whose design is described in +The pm-‘gawk’ persistence feature is based on a new persistent memory +allocator, ‘pma’, whose design is described in <https://queue.acm.org/detail.cfm?id=3534855>. It is instructive to -trace the evolutionary paths that led to 'pma' and pm-'gawk'. +trace the evolutionary paths that led to ‘pma’ and pm-‘gawk’. I wrote many AWK scripts during my dissertation research on Web caching twenty years ago, most of which processed log files from Web -servers and Web caches. Persistent 'gawk' would have made these scripts +servers and Web caches. Persistent ‘gawk’ would have made these scripts smaller, faster, and easier to write, but at the time I was unable even -to imagine that pm-'gawk' is possible. So I wrote a lot of bothersome, +to imagine that pm-‘gawk’ is possible. So I wrote a lot of bothersome, inefficient code that manually dumped and re-loaded AWK script variables to and from text files. A decade would pass before my colleagues and I began to connect the dots that make persistent scripting possible, and a -further decade would pass before pm-'gawk' came together. +further decade would pass before pm-‘gawk’ came together. Circa 2011 while working at HP Labs I developed a fault-tolerant -distributed computing platform called "Ken," which contained a -persistent memory allocator that resembles a simplified 'pma': It -presented a 'malloc()'-like C interface and it allocated memory from a +distributed computing platform called “Ken,” which contained a +persistent memory allocator that resembles a simplified ‘pma’: It +presented a ‘malloc()’-like C interface and it allocated memory from a file-backed memory mapping. Experience with Ken convinced me that the software abstraction of persistent memory offers important attractions compared with the alternatives for managing persistent data (e.g., -relational databases and key-value stores). Unfortunately, Ken's -allocator is so deeply intertwined with the rest of Ken that it's -essentially inseparable; to enjoy the benefits of Ken's persistent -memory, one must "buy in" to a larger and more complicated value -proposition. Whatever its other virtues might be, Ken isn't ideal for +relational databases and key-value stores). Unfortunately, Ken’s +allocator is so deeply intertwined with the rest of Ken that it’s +essentially inseparable; to enjoy the benefits of Ken’s persistent +memory, one must “buy in” to a larger and more complicated value +proposition. Whatever its other virtues might be, Ken isn’t ideal for showcasing the benefits of persistent memory in isolation. Another entangled aspect of Ken was a crash-tolerance mechanism that, in retrospect, can be viewed as a user-space implementation of -failure-atomic 'msync()'. The first post-Ken disentanglement effort +failure-atomic ‘msync()’. The first post-Ken disentanglement effort isolated the crash-tolerance mechanism and implemented it in the Linux -kernel, calling the result "failure-atomic 'msync()'" (FAMS). FAMS -strengthens the semantics of ordinary standard 'msync()' by guaranteeing +kernel, calling the result “failure-atomic ‘msync()’” (FAMS). FAMS +strengthens the semantics of ordinary standard ‘msync()’ by guaranteeing that the durable state of a memory-mapped file always reflects the most -recent successful 'msync()' call, even in the presence of failures such +recent successful ‘msync()’ call, even in the presence of failures such as power outages and OS or application crashes. The original Linux kernel FAMS prototype is described in a paper by Park et al. in EuroSys 2013. My colleagues and I subsequently implemented FAMS in several @@ -1055,39 +1054,39 @@ different ways including in file systems (FAST 2015) and user-space libraries. My most recent FAMS implementation, which leverages the reflink copying feature described elsewhere in this manual, is now the foundation of a new crash-tolerance feature in the venerable and -ubiquitous GNU 'dbm' ('gdbm') database +ubiquitous GNU ‘dbm’ (‘gdbm’) database (<https://queue.acm.org/detail.cfm?id=3487353>). In recent years my attention has returned to the advantages of persistent memory programming, lately a hot topic thanks to the commercial availability of byte-addressable non-volatile memory hardware -(which, confusingly, is nowadays marketed as "persistent memory"). The +(which, confusingly, is nowadays marketed as “persistent memory”). The software abstraction of persistent memory and the corresponding programming style, however, are perfectly compatible with _conventional_ -computers--machines with neither non-volatile memory nor any other +computers—machines with neither non-volatile memory nor any other special hardware or software. I wrote a few papers making this point, for example <https://queue.acm.org/detail.cfm?id=3358957>. In early 2022 I wrote a new stand-alone persistent memory allocator, -'pma', to make persistent memory programming easy on conventional -hardware. The 'pma' interface is compatible with 'malloc()' and, unlike -Ken's allocator, 'pma' is not coupled to a particular crash-tolerance -mechanism. Using 'pma' is easy and, at least to some, enjoyable. +‘pma’, to make persistent memory programming easy on conventional +hardware. The ‘pma’ interface is compatible with ‘malloc()’ and, unlike +Ken’s allocator, ‘pma’ is not coupled to a particular crash-tolerance +mechanism. Using ‘pma’ is easy and, at least to some, enjoyable. Ken had been integrated into prototype forks of both the V8 JavaScript interpreter and a Scheme interpreter, so it was natural to -consider whether 'pma' might similarly enhance an interpreted scripting +consider whether ‘pma’ might similarly enhance an interpreted scripting language. GNU AWK was a natural choice because the source code is -orderly and because 'gawk' has a single primary maintainer with an open +orderly and because ‘gawk’ has a single primary maintainer with an open mind regarding new features. Jianan Li, Zi Fan Tan, Haris Volos, and I began considering -persistence for 'gawk' in late 2021. While I was writing 'pma', they -prototyped pm-'gawk' in a fork of the 'gawk' source. Experience with +persistence for ‘gawk’ in late 2021. While I was writing ‘pma’, they +prototyped pm-‘gawk’ in a fork of the ‘gawk’ source. Experience with the prototype confirmed the expected convenience and efficiency benefits -of pm-'gawk', and by spring 2022 Arnold Robbins was implementing -persistence in the official version of 'gawk'. The persistence feature -in official 'gawk' differs slightly from the prototype: The former uses +of pm-‘gawk’, and by spring 2022 Arnold Robbins was implementing +persistence in the official version of ‘gawk’. The persistence feature +in official ‘gawk’ differs slightly from the prototype: The former uses an environment variable to pass the heap file name to the interpreter whereas the latter uses a mandatory command-line option. In many respects, however, the two implementations are similar. A description @@ -1096,43 +1095,43 @@ of the prototype, including performance measurements, is available at - I enjoy several aspects of pm-'gawk'. It's unobtrusive; as you gain + I enjoy several aspects of pm-‘gawk’. It’s unobtrusive; as you gain familiarity and experience, it fades into the background of your -scripting. It's simple in both concept and implementation, and more +scripting. It’s simple in both concept and implementation, and more importantly it simplifies your scripts; much of its value is measured not in the code it enables you to write but rather in the code it lets -you discard. It's all that I needed for my dissertation research twenty +you discard. It’s all that I needed for my dissertation research twenty years ago, and more. Anecdotally, it appears to inspire creativity in -early adopters, who have devised uses that pm-'gawk''s designers never -anticipated. I'm curious to see what new purposes you find for it. +early adopters, who have devised uses that pm-‘gawk’’s designers never +anticipated. I’m curious to see what new purposes you find for it. Tag Table: -Node: Top806 -Node: Introduction2008 -Ref: Introduction-Footnote-14345 -Node: Quick Start4441 -Node: Examples7232 -Node: Performance16128 -Node: Constant-Time Array Access16832 -Node: Virtual Memory and Big Data20122 -Ref: Virtual Memory and Big Data-Footnote-123673 -Node: Sparse Heap Files23809 -Node: Persistence versus Durability26819 -Ref: Persistence versus Durability-Footnote-130216 -Ref: Persistence versus Durability-Footnote-230462 -Node: Experiments30856 -Node: Results42567 -Node: Data Integrity46139 -Ref: Data Integrity-Footnote-148945 -Ref: Data Integrity-Footnote-249038 -Ref: Data Integrity-Footnote-349182 -Ref: Data Integrity-Footnote-449276 -Node: Acknowledgments49631 -Node: Installation50789 -Node: Debugging51583 -Node: History53252 +Node: Top815 +Node: Introduction2036 +Ref: Introduction-Footnote-14464 +Node: Quick Start4568 +Node: Examples7485 +Node: Performance16627 +Node: Constant-Time Array Access17337 +Node: Virtual Memory and Big Data20704 +Ref: Virtual Memory and Big Data-Footnote-124341 +Node: Sparse Heap Files24481 +Node: Persistence versus Durability27580 +Ref: Persistence versus Durability-Footnote-131060 +Ref: Persistence versus Durability-Footnote-231306 +Node: Experiments31707 +Node: Results43458 +Node: Data Integrity47097 +Ref: Data Integrity-Footnote-149964 +Ref: Data Integrity-Footnote-250061 +Ref: Data Integrity-Footnote-350213 +Ref: Data Integrity-Footnote-450307 +Node: Acknowledgments50688 +Node: Installation51888 +Node: Debugging52718 +Node: History54467 End Tag Table diff --git a/doc/wordlist b/doc/wordlist index e4563fef..5342b675 100644 --- a/doc/wordlist +++ b/doc/wordlist @@ -1128,10 +1128,12 @@ ifdef ifdocbook ifhtml ifinfo +iflatex ifndef ifnotdocbook ifnothtml ifnotinfo +ifnotlatex ifnotplaintext ifnottex ifnotxml @@ -1691,6 +1693,7 @@ strftime strlen strnum strnums +strptime strtod strtonum struct @@ -1749,6 +1752,7 @@ thisopt thispage thrudvang tid +timeval timex titlepage tlines |