diff options
Diffstat (limited to 'doc')
-rw-r--r-- | doc/Makefile.am | 33 | ||||
-rw-r--r-- | doc/Makefile.in | 1497 | ||||
-rw-r--r-- | doc/config.texi | 9 | ||||
-rwxr-xr-x | doc/groupify.sed | 59 | ||||
-rw-r--r-- | doc/s-texi | 1 | ||||
-rw-r--r-- | doc/sed-in.texi | 4187 | ||||
-rw-r--r-- | doc/sed.1 | 411 | ||||
-rw-r--r-- | doc/sed.info | 2612 | ||||
-rw-r--r-- | doc/sed.texi | 4356 | ||||
-rw-r--r-- | doc/sed.x | 333 | ||||
-rw-r--r-- | doc/stamp-vti | 4 | ||||
-rw-r--r-- | doc/version.texi | 4 |
12 files changed, 13506 insertions, 0 deletions
diff --git a/doc/Makefile.am b/doc/Makefile.am new file mode 100644 index 0000000..af6975c --- /dev/null +++ b/doc/Makefile.am @@ -0,0 +1,33 @@ +## Process this file with automake to produce Makefile.in +info_TEXINFOS = sed.texi +sed_TEXINFOS = config.texi version.texi +dist_man_MANS = sed.1 +dist_noinst_DATA = sed.x sed-in.texi s-texi +dist_noinst_SCRIPTS = groupify.sed +TEXI2DVI = $(top_srcdir)/build-aux/texi2dvi --expand +HELP2MAN = $(top_srcdir)/build-aux/help2man +SED = $(top_builddir)/sed/sed + +AM_MAKEINFOHTMLFLAGS = --no-split + +# To produce better quality output, in the example sed +# scripts we group comments with lines following them; +# since mantaining the "@group...@end group" manually +# is a burden, we do this automatically +$(srcdir)/sed.texi: $(srcdir)/s-texi +$(srcdir)/s-texi: sed-in.texi $(srcdir)/groupify.sed + sed -nf $(srcdir)/groupify.sed \ + < $(srcdir)/sed-in.texi > $(srcdir)/sed-tmp.texi + if cmp $(srcdir)/sed.texi $(srcdir)/sed-tmp.texi; then \ + rm -f $(srcdir)/sed-tmp.texi; \ + else \ + mv -f $(srcdir)/sed-tmp.texi $(srcdir)/sed.texi; \ + fi + echo stamp > $(srcdir)/s-texi + +sed.1: $(top_srcdir)/sed/sed.c $(top_srcdir)/configure.ac $(srcdir)/sed.x + $(HELP2MAN) --name "stream editor for filtering and transforming text" \ + -p sed --include $(srcdir)/sed.x -o $(srcdir)/sed.1 $(SED) + +dist-hook: + touch $(distdir)/sed.1 diff --git a/doc/Makefile.in b/doc/Makefile.in new file mode 100644 index 0000000..84e07d1 --- /dev/null +++ b/doc/Makefile.in @@ -0,0 +1,1497 @@ +# Makefile.in generated by automake 1.12.2 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2012 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = doc +DIST_COMMON = $(dist_man_MANS) $(dist_noinst_DATA) \ + $(dist_noinst_SCRIPTS) $(sed_TEXINFOS) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in $(srcdir)/stamp-vti \ + $(srcdir)/version.texi $(top_srcdir)/build-aux/mdate-sh \ + $(top_srcdir)/build-aux/texinfo.tex +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/00gnulib.m4 \ + $(top_srcdir)/m4/acl.m4 $(top_srcdir)/m4/alloca.m4 \ + $(top_srcdir)/m4/btowc.m4 $(top_srcdir)/m4/canonicalize.m4 \ + $(top_srcdir)/m4/codeset.m4 $(top_srcdir)/m4/configmake.m4 \ + $(top_srcdir)/m4/dirname.m4 \ + $(top_srcdir)/m4/double-slash-root.m4 \ + $(top_srcdir)/m4/eealloc.m4 $(top_srcdir)/m4/errno_h.m4 \ + $(top_srcdir)/m4/error.m4 $(top_srcdir)/m4/extensions.m4 \ + $(top_srcdir)/m4/extern-inline.m4 $(top_srcdir)/m4/fcntl-o.m4 \ + $(top_srcdir)/m4/fcntl_h.m4 $(top_srcdir)/m4/fstat.m4 \ + $(top_srcdir)/m4/fwriting.m4 $(top_srcdir)/m4/getdelim.m4 \ + $(top_srcdir)/m4/getopt.m4 $(top_srcdir)/m4/gettext.m4 \ + $(top_srcdir)/m4/gettimeofday.m4 $(top_srcdir)/m4/glibc21.m4 \ + $(top_srcdir)/m4/gnulib-common.m4 \ + $(top_srcdir)/m4/gnulib-comp.m4 $(top_srcdir)/m4/iconv.m4 \ + $(top_srcdir)/m4/include_next.m4 \ + $(top_srcdir)/m4/intlmacosx.m4 $(top_srcdir)/m4/langinfo_h.m4 \ + $(top_srcdir)/m4/largefile.m4 $(top_srcdir)/m4/lib-ld.m4 \ + $(top_srcdir)/m4/lib-link.m4 $(top_srcdir)/m4/lib-prefix.m4 \ + $(top_srcdir)/m4/localcharset.m4 $(top_srcdir)/m4/locale-fr.m4 \ + $(top_srcdir)/m4/locale-ja.m4 $(top_srcdir)/m4/locale-zh.m4 \ + $(top_srcdir)/m4/locale_h.m4 $(top_srcdir)/m4/localeconv.m4 \ + $(top_srcdir)/m4/longlong.m4 $(top_srcdir)/m4/lstat.m4 \ + $(top_srcdir)/m4/malloc.m4 $(top_srcdir)/m4/malloca.m4 \ + $(top_srcdir)/m4/mbrlen.m4 $(top_srcdir)/m4/mbrtowc.m4 \ + $(top_srcdir)/m4/mbsinit.m4 $(top_srcdir)/m4/mbstate_t.m4 \ + $(top_srcdir)/m4/mbtowc.m4 $(top_srcdir)/m4/memchr.m4 \ + $(top_srcdir)/m4/mkostemp.m4 $(top_srcdir)/m4/mmap-anon.m4 \ + $(top_srcdir)/m4/msvc-inval.m4 \ + $(top_srcdir)/m4/msvc-nothrow.m4 $(top_srcdir)/m4/multiarch.m4 \ + $(top_srcdir)/m4/nl_langinfo.m4 $(top_srcdir)/m4/nls.m4 \ + $(top_srcdir)/m4/nocrash.m4 $(top_srcdir)/m4/off_t.m4 \ + $(top_srcdir)/m4/pathmax.m4 $(top_srcdir)/m4/po.m4 \ + $(top_srcdir)/m4/progtest.m4 $(top_srcdir)/m4/quote.m4 \ + $(top_srcdir)/m4/quotearg.m4 $(top_srcdir)/m4/readlink.m4 \ + $(top_srcdir)/m4/realloc.m4 $(top_srcdir)/m4/regex.m4 \ + $(top_srcdir)/m4/rename.m4 $(top_srcdir)/m4/rmdir.m4 \ + $(top_srcdir)/m4/selinux-context-h.m4 \ + $(top_srcdir)/m4/selinux-selinux-h.m4 \ + $(top_srcdir)/m4/ssize_t.m4 $(top_srcdir)/m4/stat.m4 \ + $(top_srcdir)/m4/stdarg.m4 $(top_srcdir)/m4/stdbool.m4 \ + $(top_srcdir)/m4/stddef_h.m4 $(top_srcdir)/m4/stdint.m4 \ + $(top_srcdir)/m4/stdio_h.m4 $(top_srcdir)/m4/stdlib_h.m4 \ + $(top_srcdir)/m4/strerror.m4 $(top_srcdir)/m4/string_h.m4 \ + $(top_srcdir)/m4/strverscmp.m4 \ + $(top_srcdir)/m4/sys_socket_h.m4 \ + $(top_srcdir)/m4/sys_stat_h.m4 $(top_srcdir)/m4/sys_time_h.m4 \ + $(top_srcdir)/m4/sys_types_h.m4 $(top_srcdir)/m4/tempname.m4 \ + $(top_srcdir)/m4/threadlib.m4 $(top_srcdir)/m4/time_h.m4 \ + $(top_srcdir)/m4/unistd_h.m4 $(top_srcdir)/m4/unlocked-io.m4 \ + $(top_srcdir)/m4/version-etc.m4 \ + $(top_srcdir)/m4/warn-on-use.m4 $(top_srcdir)/m4/wchar_h.m4 \ + $(top_srcdir)/m4/wchar_t.m4 $(top_srcdir)/m4/wcrtomb.m4 \ + $(top_srcdir)/m4/wctob.m4 $(top_srcdir)/m4/wctomb.m4 \ + $(top_srcdir)/m4/wctype_h.m4 $(top_srcdir)/m4/wint_t.m4 \ + $(top_srcdir)/m4/xalloc.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +SCRIPTS = $(dist_noinst_SCRIPTS) +SOURCES = +DIST_SOURCES = +INFO_DEPS = $(srcdir)/sed.info +TEXINFO_TEX = $(top_srcdir)/build-aux/texinfo.tex +am__TEXINFO_TEX_DIR = $(top_srcdir)/build-aux +DVIS = sed.dvi +PDFS = sed.pdf +PSS = sed.ps +HTMLS = sed.html +TEXINFOS = sed.texi +TEXI2PDF = $(TEXI2DVI) --pdf --batch +MAKEINFOHTML = $(MAKEINFO) --html +DVIPS = dvips +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__installdirs = "$(DESTDIR)$(infodir)" "$(DESTDIR)$(man1dir)" +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +man1dir = $(mandir)/man1 +NROFF = nroff +MANS = $(dist_man_MANS) +DATA = $(dist_noinst_DATA) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkglibexecdir = @pkglibexecdir@ +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +ALLOCA_H = @ALLOCA_H@ +AMTAR = @AMTAR@ +APPLE_UNIVERSAL_BUILD = @APPLE_UNIVERSAL_BUILD@ +AR = @AR@ +ARFLAGS = @ARFLAGS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BITSIZEOF_PTRDIFF_T = @BITSIZEOF_PTRDIFF_T@ +BITSIZEOF_SIG_ATOMIC_T = @BITSIZEOF_SIG_ATOMIC_T@ +BITSIZEOF_SIZE_T = @BITSIZEOF_SIZE_T@ +BITSIZEOF_WCHAR_T = @BITSIZEOF_WCHAR_T@ +BITSIZEOF_WINT_T = @BITSIZEOF_WINT_T@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +COPYRIGHT_YEAR = @COPYRIGHT_YEAR@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EMULTIHOP_HIDDEN = @EMULTIHOP_HIDDEN@ +EMULTIHOP_VALUE = @EMULTIHOP_VALUE@ +ENOLINK_HIDDEN = @ENOLINK_HIDDEN@ +ENOLINK_VALUE = @ENOLINK_VALUE@ +EOVERFLOW_HIDDEN = @EOVERFLOW_HIDDEN@ +EOVERFLOW_VALUE = @EOVERFLOW_VALUE@ +ERRNO_H = @ERRNO_H@ +EXEEXT = @EXEEXT@ +GETOPT_H = @GETOPT_H@ +GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@ +GLIBC21 = @GLIBC21@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ +GNULIB_ATOLL = @GNULIB_ATOLL@ +GNULIB_BTOWC = @GNULIB_BTOWC@ +GNULIB_CALLOC_POSIX = @GNULIB_CALLOC_POSIX@ +GNULIB_CANONICALIZE_FILE_NAME = @GNULIB_CANONICALIZE_FILE_NAME@ +GNULIB_CHDIR = @GNULIB_CHDIR@ +GNULIB_CHOWN = @GNULIB_CHOWN@ +GNULIB_CLOSE = @GNULIB_CLOSE@ +GNULIB_DPRINTF = @GNULIB_DPRINTF@ +GNULIB_DUP = @GNULIB_DUP@ +GNULIB_DUP2 = @GNULIB_DUP2@ +GNULIB_DUP3 = @GNULIB_DUP3@ +GNULIB_DUPLOCALE = @GNULIB_DUPLOCALE@ +GNULIB_ENVIRON = @GNULIB_ENVIRON@ +GNULIB_EUIDACCESS = @GNULIB_EUIDACCESS@ +GNULIB_FACCESSAT = @GNULIB_FACCESSAT@ +GNULIB_FCHDIR = @GNULIB_FCHDIR@ +GNULIB_FCHMODAT = @GNULIB_FCHMODAT@ +GNULIB_FCHOWNAT = @GNULIB_FCHOWNAT@ +GNULIB_FCLOSE = @GNULIB_FCLOSE@ +GNULIB_FCNTL = @GNULIB_FCNTL@ +GNULIB_FDATASYNC = @GNULIB_FDATASYNC@ +GNULIB_FDOPEN = @GNULIB_FDOPEN@ +GNULIB_FFLUSH = @GNULIB_FFLUSH@ +GNULIB_FFSL = @GNULIB_FFSL@ +GNULIB_FFSLL = @GNULIB_FFSLL@ +GNULIB_FGETC = @GNULIB_FGETC@ +GNULIB_FGETS = @GNULIB_FGETS@ +GNULIB_FOPEN = @GNULIB_FOPEN@ +GNULIB_FPRINTF = @GNULIB_FPRINTF@ +GNULIB_FPRINTF_POSIX = @GNULIB_FPRINTF_POSIX@ +GNULIB_FPURGE = @GNULIB_FPURGE@ +GNULIB_FPUTC = @GNULIB_FPUTC@ +GNULIB_FPUTS = @GNULIB_FPUTS@ +GNULIB_FREAD = @GNULIB_FREAD@ +GNULIB_FREOPEN = @GNULIB_FREOPEN@ +GNULIB_FSCANF = @GNULIB_FSCANF@ +GNULIB_FSEEK = @GNULIB_FSEEK@ +GNULIB_FSEEKO = @GNULIB_FSEEKO@ +GNULIB_FSTAT = @GNULIB_FSTAT@ +GNULIB_FSTATAT = @GNULIB_FSTATAT@ +GNULIB_FSYNC = @GNULIB_FSYNC@ +GNULIB_FTELL = @GNULIB_FTELL@ +GNULIB_FTELLO = @GNULIB_FTELLO@ +GNULIB_FTRUNCATE = @GNULIB_FTRUNCATE@ +GNULIB_FUTIMENS = @GNULIB_FUTIMENS@ +GNULIB_FWRITE = @GNULIB_FWRITE@ +GNULIB_GETC = @GNULIB_GETC@ +GNULIB_GETCHAR = @GNULIB_GETCHAR@ +GNULIB_GETCWD = @GNULIB_GETCWD@ +GNULIB_GETDELIM = @GNULIB_GETDELIM@ +GNULIB_GETDOMAINNAME = @GNULIB_GETDOMAINNAME@ +GNULIB_GETDTABLESIZE = @GNULIB_GETDTABLESIZE@ +GNULIB_GETGROUPS = @GNULIB_GETGROUPS@ +GNULIB_GETHOSTNAME = @GNULIB_GETHOSTNAME@ +GNULIB_GETLINE = @GNULIB_GETLINE@ +GNULIB_GETLOADAVG = @GNULIB_GETLOADAVG@ +GNULIB_GETLOGIN = @GNULIB_GETLOGIN@ +GNULIB_GETLOGIN_R = @GNULIB_GETLOGIN_R@ +GNULIB_GETPAGESIZE = @GNULIB_GETPAGESIZE@ +GNULIB_GETSUBOPT = @GNULIB_GETSUBOPT@ +GNULIB_GETTIMEOFDAY = @GNULIB_GETTIMEOFDAY@ +GNULIB_GETUSERSHELL = @GNULIB_GETUSERSHELL@ +GNULIB_GL_UNISTD_H_GETOPT = @GNULIB_GL_UNISTD_H_GETOPT@ +GNULIB_GRANTPT = @GNULIB_GRANTPT@ +GNULIB_GROUP_MEMBER = @GNULIB_GROUP_MEMBER@ +GNULIB_ISATTY = @GNULIB_ISATTY@ +GNULIB_ISWBLANK = @GNULIB_ISWBLANK@ +GNULIB_ISWCTYPE = @GNULIB_ISWCTYPE@ +GNULIB_LCHMOD = @GNULIB_LCHMOD@ +GNULIB_LCHOWN = @GNULIB_LCHOWN@ +GNULIB_LINK = @GNULIB_LINK@ +GNULIB_LINKAT = @GNULIB_LINKAT@ +GNULIB_LOCALECONV = @GNULIB_LOCALECONV@ +GNULIB_LSEEK = @GNULIB_LSEEK@ +GNULIB_LSTAT = @GNULIB_LSTAT@ +GNULIB_MALLOC_POSIX = @GNULIB_MALLOC_POSIX@ +GNULIB_MBRLEN = @GNULIB_MBRLEN@ +GNULIB_MBRTOWC = @GNULIB_MBRTOWC@ +GNULIB_MBSCASECMP = @GNULIB_MBSCASECMP@ +GNULIB_MBSCASESTR = @GNULIB_MBSCASESTR@ +GNULIB_MBSCHR = @GNULIB_MBSCHR@ +GNULIB_MBSCSPN = @GNULIB_MBSCSPN@ +GNULIB_MBSINIT = @GNULIB_MBSINIT@ +GNULIB_MBSLEN = @GNULIB_MBSLEN@ +GNULIB_MBSNCASECMP = @GNULIB_MBSNCASECMP@ +GNULIB_MBSNLEN = @GNULIB_MBSNLEN@ +GNULIB_MBSNRTOWCS = @GNULIB_MBSNRTOWCS@ +GNULIB_MBSPBRK = @GNULIB_MBSPBRK@ +GNULIB_MBSPCASECMP = @GNULIB_MBSPCASECMP@ +GNULIB_MBSRCHR = @GNULIB_MBSRCHR@ +GNULIB_MBSRTOWCS = @GNULIB_MBSRTOWCS@ +GNULIB_MBSSEP = @GNULIB_MBSSEP@ +GNULIB_MBSSPN = @GNULIB_MBSSPN@ +GNULIB_MBSSTR = @GNULIB_MBSSTR@ +GNULIB_MBSTOK_R = @GNULIB_MBSTOK_R@ +GNULIB_MBTOWC = @GNULIB_MBTOWC@ +GNULIB_MEMCHR = @GNULIB_MEMCHR@ +GNULIB_MEMMEM = @GNULIB_MEMMEM@ +GNULIB_MEMPCPY = @GNULIB_MEMPCPY@ +GNULIB_MEMRCHR = @GNULIB_MEMRCHR@ +GNULIB_MKDIRAT = @GNULIB_MKDIRAT@ +GNULIB_MKDTEMP = @GNULIB_MKDTEMP@ +GNULIB_MKFIFO = @GNULIB_MKFIFO@ +GNULIB_MKFIFOAT = @GNULIB_MKFIFOAT@ +GNULIB_MKNOD = @GNULIB_MKNOD@ +GNULIB_MKNODAT = @GNULIB_MKNODAT@ +GNULIB_MKOSTEMP = @GNULIB_MKOSTEMP@ +GNULIB_MKOSTEMPS = @GNULIB_MKOSTEMPS@ +GNULIB_MKSTEMP = @GNULIB_MKSTEMP@ +GNULIB_MKSTEMPS = @GNULIB_MKSTEMPS@ +GNULIB_MKTIME = @GNULIB_MKTIME@ +GNULIB_NANOSLEEP = @GNULIB_NANOSLEEP@ +GNULIB_NL_LANGINFO = @GNULIB_NL_LANGINFO@ +GNULIB_NONBLOCKING = @GNULIB_NONBLOCKING@ +GNULIB_OBSTACK_PRINTF = @GNULIB_OBSTACK_PRINTF@ +GNULIB_OBSTACK_PRINTF_POSIX = @GNULIB_OBSTACK_PRINTF_POSIX@ +GNULIB_OPEN = @GNULIB_OPEN@ +GNULIB_OPENAT = @GNULIB_OPENAT@ +GNULIB_PCLOSE = @GNULIB_PCLOSE@ +GNULIB_PERROR = @GNULIB_PERROR@ +GNULIB_PIPE = @GNULIB_PIPE@ +GNULIB_PIPE2 = @GNULIB_PIPE2@ +GNULIB_POPEN = @GNULIB_POPEN@ +GNULIB_POSIX_OPENPT = @GNULIB_POSIX_OPENPT@ +GNULIB_PREAD = @GNULIB_PREAD@ +GNULIB_PRINTF = @GNULIB_PRINTF@ +GNULIB_PRINTF_POSIX = @GNULIB_PRINTF_POSIX@ +GNULIB_PTSNAME = @GNULIB_PTSNAME@ +GNULIB_PTSNAME_R = @GNULIB_PTSNAME_R@ +GNULIB_PUTC = @GNULIB_PUTC@ +GNULIB_PUTCHAR = @GNULIB_PUTCHAR@ +GNULIB_PUTENV = @GNULIB_PUTENV@ +GNULIB_PUTS = @GNULIB_PUTS@ +GNULIB_PWRITE = @GNULIB_PWRITE@ +GNULIB_RANDOM = @GNULIB_RANDOM@ +GNULIB_RANDOM_R = @GNULIB_RANDOM_R@ +GNULIB_RAWMEMCHR = @GNULIB_RAWMEMCHR@ +GNULIB_READ = @GNULIB_READ@ +GNULIB_READLINK = @GNULIB_READLINK@ +GNULIB_READLINKAT = @GNULIB_READLINKAT@ +GNULIB_REALLOC_POSIX = @GNULIB_REALLOC_POSIX@ +GNULIB_REALPATH = @GNULIB_REALPATH@ +GNULIB_REMOVE = @GNULIB_REMOVE@ +GNULIB_RENAME = @GNULIB_RENAME@ +GNULIB_RENAMEAT = @GNULIB_RENAMEAT@ +GNULIB_RMDIR = @GNULIB_RMDIR@ +GNULIB_RPMATCH = @GNULIB_RPMATCH@ +GNULIB_SCANF = @GNULIB_SCANF@ +GNULIB_SETENV = @GNULIB_SETENV@ +GNULIB_SETHOSTNAME = @GNULIB_SETHOSTNAME@ +GNULIB_SETLOCALE = @GNULIB_SETLOCALE@ +GNULIB_SLEEP = @GNULIB_SLEEP@ +GNULIB_SNPRINTF = @GNULIB_SNPRINTF@ +GNULIB_SPRINTF_POSIX = @GNULIB_SPRINTF_POSIX@ +GNULIB_STAT = @GNULIB_STAT@ +GNULIB_STDIO_H_NONBLOCKING = @GNULIB_STDIO_H_NONBLOCKING@ +GNULIB_STDIO_H_SIGPIPE = @GNULIB_STDIO_H_SIGPIPE@ +GNULIB_STPCPY = @GNULIB_STPCPY@ +GNULIB_STPNCPY = @GNULIB_STPNCPY@ +GNULIB_STRCASESTR = @GNULIB_STRCASESTR@ +GNULIB_STRCHRNUL = @GNULIB_STRCHRNUL@ +GNULIB_STRDUP = @GNULIB_STRDUP@ +GNULIB_STRERROR = @GNULIB_STRERROR@ +GNULIB_STRERROR_R = @GNULIB_STRERROR_R@ +GNULIB_STRNCAT = @GNULIB_STRNCAT@ +GNULIB_STRNDUP = @GNULIB_STRNDUP@ +GNULIB_STRNLEN = @GNULIB_STRNLEN@ +GNULIB_STRPBRK = @GNULIB_STRPBRK@ +GNULIB_STRPTIME = @GNULIB_STRPTIME@ +GNULIB_STRSEP = @GNULIB_STRSEP@ +GNULIB_STRSIGNAL = @GNULIB_STRSIGNAL@ +GNULIB_STRSTR = @GNULIB_STRSTR@ +GNULIB_STRTOD = @GNULIB_STRTOD@ +GNULIB_STRTOK_R = @GNULIB_STRTOK_R@ +GNULIB_STRTOLL = @GNULIB_STRTOLL@ +GNULIB_STRTOULL = @GNULIB_STRTOULL@ +GNULIB_STRVERSCMP = @GNULIB_STRVERSCMP@ +GNULIB_SYMLINK = @GNULIB_SYMLINK@ +GNULIB_SYMLINKAT = @GNULIB_SYMLINKAT@ +GNULIB_SYSTEM_POSIX = @GNULIB_SYSTEM_POSIX@ +GNULIB_TIMEGM = @GNULIB_TIMEGM@ +GNULIB_TIME_R = @GNULIB_TIME_R@ +GNULIB_TMPFILE = @GNULIB_TMPFILE@ +GNULIB_TOWCTRANS = @GNULIB_TOWCTRANS@ +GNULIB_TTYNAME_R = @GNULIB_TTYNAME_R@ +GNULIB_UNISTD_H_NONBLOCKING = @GNULIB_UNISTD_H_NONBLOCKING@ +GNULIB_UNISTD_H_SIGPIPE = @GNULIB_UNISTD_H_SIGPIPE@ +GNULIB_UNLINK = @GNULIB_UNLINK@ +GNULIB_UNLINKAT = @GNULIB_UNLINKAT@ +GNULIB_UNLOCKPT = @GNULIB_UNLOCKPT@ +GNULIB_UNSETENV = @GNULIB_UNSETENV@ +GNULIB_USLEEP = @GNULIB_USLEEP@ +GNULIB_UTIMENSAT = @GNULIB_UTIMENSAT@ +GNULIB_VASPRINTF = @GNULIB_VASPRINTF@ +GNULIB_VDPRINTF = @GNULIB_VDPRINTF@ +GNULIB_VFPRINTF = @GNULIB_VFPRINTF@ +GNULIB_VFPRINTF_POSIX = @GNULIB_VFPRINTF_POSIX@ +GNULIB_VFSCANF = @GNULIB_VFSCANF@ +GNULIB_VPRINTF = @GNULIB_VPRINTF@ +GNULIB_VPRINTF_POSIX = @GNULIB_VPRINTF_POSIX@ +GNULIB_VSCANF = @GNULIB_VSCANF@ +GNULIB_VSNPRINTF = @GNULIB_VSNPRINTF@ +GNULIB_VSPRINTF_POSIX = @GNULIB_VSPRINTF_POSIX@ +GNULIB_WCPCPY = @GNULIB_WCPCPY@ +GNULIB_WCPNCPY = @GNULIB_WCPNCPY@ +GNULIB_WCRTOMB = @GNULIB_WCRTOMB@ +GNULIB_WCSCASECMP = @GNULIB_WCSCASECMP@ +GNULIB_WCSCAT = @GNULIB_WCSCAT@ +GNULIB_WCSCHR = @GNULIB_WCSCHR@ +GNULIB_WCSCMP = @GNULIB_WCSCMP@ +GNULIB_WCSCOLL = @GNULIB_WCSCOLL@ +GNULIB_WCSCPY = @GNULIB_WCSCPY@ +GNULIB_WCSCSPN = @GNULIB_WCSCSPN@ +GNULIB_WCSDUP = @GNULIB_WCSDUP@ +GNULIB_WCSLEN = @GNULIB_WCSLEN@ +GNULIB_WCSNCASECMP = @GNULIB_WCSNCASECMP@ +GNULIB_WCSNCAT = @GNULIB_WCSNCAT@ +GNULIB_WCSNCMP = @GNULIB_WCSNCMP@ +GNULIB_WCSNCPY = @GNULIB_WCSNCPY@ +GNULIB_WCSNLEN = @GNULIB_WCSNLEN@ +GNULIB_WCSNRTOMBS = @GNULIB_WCSNRTOMBS@ +GNULIB_WCSPBRK = @GNULIB_WCSPBRK@ +GNULIB_WCSRCHR = @GNULIB_WCSRCHR@ +GNULIB_WCSRTOMBS = @GNULIB_WCSRTOMBS@ +GNULIB_WCSSPN = @GNULIB_WCSSPN@ +GNULIB_WCSSTR = @GNULIB_WCSSTR@ +GNULIB_WCSTOK = @GNULIB_WCSTOK@ +GNULIB_WCSWIDTH = @GNULIB_WCSWIDTH@ +GNULIB_WCSXFRM = @GNULIB_WCSXFRM@ +GNULIB_WCTOB = @GNULIB_WCTOB@ +GNULIB_WCTOMB = @GNULIB_WCTOMB@ +GNULIB_WCTRANS = @GNULIB_WCTRANS@ +GNULIB_WCTYPE = @GNULIB_WCTYPE@ +GNULIB_WCWIDTH = @GNULIB_WCWIDTH@ +GNULIB_WMEMCHR = @GNULIB_WMEMCHR@ +GNULIB_WMEMCMP = @GNULIB_WMEMCMP@ +GNULIB_WMEMCPY = @GNULIB_WMEMCPY@ +GNULIB_WMEMMOVE = @GNULIB_WMEMMOVE@ +GNULIB_WMEMSET = @GNULIB_WMEMSET@ +GNULIB_WRITE = @GNULIB_WRITE@ +GNULIB__EXIT = @GNULIB__EXIT@ +GREP = @GREP@ +HAVE_ATOLL = @HAVE_ATOLL@ +HAVE_BTOWC = @HAVE_BTOWC@ +HAVE_CANONICALIZE_FILE_NAME = @HAVE_CANONICALIZE_FILE_NAME@ +HAVE_CHOWN = @HAVE_CHOWN@ +HAVE_DECL_ENVIRON = @HAVE_DECL_ENVIRON@ +HAVE_DECL_FCHDIR = @HAVE_DECL_FCHDIR@ +HAVE_DECL_FDATASYNC = @HAVE_DECL_FDATASYNC@ +HAVE_DECL_FPURGE = @HAVE_DECL_FPURGE@ +HAVE_DECL_FSEEKO = @HAVE_DECL_FSEEKO@ +HAVE_DECL_FTELLO = @HAVE_DECL_FTELLO@ +HAVE_DECL_GETDELIM = @HAVE_DECL_GETDELIM@ +HAVE_DECL_GETDOMAINNAME = @HAVE_DECL_GETDOMAINNAME@ +HAVE_DECL_GETLINE = @HAVE_DECL_GETLINE@ +HAVE_DECL_GETLOADAVG = @HAVE_DECL_GETLOADAVG@ +HAVE_DECL_GETLOGIN_R = @HAVE_DECL_GETLOGIN_R@ +HAVE_DECL_GETPAGESIZE = @HAVE_DECL_GETPAGESIZE@ +HAVE_DECL_GETUSERSHELL = @HAVE_DECL_GETUSERSHELL@ +HAVE_DECL_LOCALTIME_R = @HAVE_DECL_LOCALTIME_R@ +HAVE_DECL_MEMMEM = @HAVE_DECL_MEMMEM@ +HAVE_DECL_MEMRCHR = @HAVE_DECL_MEMRCHR@ +HAVE_DECL_OBSTACK_PRINTF = @HAVE_DECL_OBSTACK_PRINTF@ +HAVE_DECL_SETENV = @HAVE_DECL_SETENV@ +HAVE_DECL_SETHOSTNAME = @HAVE_DECL_SETHOSTNAME@ +HAVE_DECL_SNPRINTF = @HAVE_DECL_SNPRINTF@ +HAVE_DECL_STRDUP = @HAVE_DECL_STRDUP@ +HAVE_DECL_STRERROR_R = @HAVE_DECL_STRERROR_R@ +HAVE_DECL_STRNDUP = @HAVE_DECL_STRNDUP@ +HAVE_DECL_STRNLEN = @HAVE_DECL_STRNLEN@ +HAVE_DECL_STRSIGNAL = @HAVE_DECL_STRSIGNAL@ +HAVE_DECL_STRTOK_R = @HAVE_DECL_STRTOK_R@ +HAVE_DECL_TTYNAME_R = @HAVE_DECL_TTYNAME_R@ +HAVE_DECL_UNSETENV = @HAVE_DECL_UNSETENV@ +HAVE_DECL_VSNPRINTF = @HAVE_DECL_VSNPRINTF@ +HAVE_DECL_WCTOB = @HAVE_DECL_WCTOB@ +HAVE_DECL_WCWIDTH = @HAVE_DECL_WCWIDTH@ +HAVE_DPRINTF = @HAVE_DPRINTF@ +HAVE_DUP2 = @HAVE_DUP2@ +HAVE_DUP3 = @HAVE_DUP3@ +HAVE_DUPLOCALE = @HAVE_DUPLOCALE@ +HAVE_EUIDACCESS = @HAVE_EUIDACCESS@ +HAVE_FACCESSAT = @HAVE_FACCESSAT@ +HAVE_FCHDIR = @HAVE_FCHDIR@ +HAVE_FCHMODAT = @HAVE_FCHMODAT@ +HAVE_FCHOWNAT = @HAVE_FCHOWNAT@ +HAVE_FCNTL = @HAVE_FCNTL@ +HAVE_FDATASYNC = @HAVE_FDATASYNC@ +HAVE_FEATURES_H = @HAVE_FEATURES_H@ +HAVE_FFSL = @HAVE_FFSL@ +HAVE_FFSLL = @HAVE_FFSLL@ +HAVE_FSEEKO = @HAVE_FSEEKO@ +HAVE_FSTATAT = @HAVE_FSTATAT@ +HAVE_FSYNC = @HAVE_FSYNC@ +HAVE_FTELLO = @HAVE_FTELLO@ +HAVE_FTRUNCATE = @HAVE_FTRUNCATE@ +HAVE_FUTIMENS = @HAVE_FUTIMENS@ +HAVE_GETDTABLESIZE = @HAVE_GETDTABLESIZE@ +HAVE_GETGROUPS = @HAVE_GETGROUPS@ +HAVE_GETHOSTNAME = @HAVE_GETHOSTNAME@ +HAVE_GETLOGIN = @HAVE_GETLOGIN@ +HAVE_GETOPT_H = @HAVE_GETOPT_H@ +HAVE_GETPAGESIZE = @HAVE_GETPAGESIZE@ +HAVE_GETSUBOPT = @HAVE_GETSUBOPT@ +HAVE_GETTIMEOFDAY = @HAVE_GETTIMEOFDAY@ +HAVE_GRANTPT = @HAVE_GRANTPT@ +HAVE_GROUP_MEMBER = @HAVE_GROUP_MEMBER@ +HAVE_INTTYPES_H = @HAVE_INTTYPES_H@ +HAVE_ISWBLANK = @HAVE_ISWBLANK@ +HAVE_ISWCNTRL = @HAVE_ISWCNTRL@ +HAVE_LANGINFO_CODESET = @HAVE_LANGINFO_CODESET@ +HAVE_LANGINFO_ERA = @HAVE_LANGINFO_ERA@ +HAVE_LANGINFO_H = @HAVE_LANGINFO_H@ +HAVE_LANGINFO_T_FMT_AMPM = @HAVE_LANGINFO_T_FMT_AMPM@ +HAVE_LANGINFO_YESEXPR = @HAVE_LANGINFO_YESEXPR@ +HAVE_LCHMOD = @HAVE_LCHMOD@ +HAVE_LCHOWN = @HAVE_LCHOWN@ +HAVE_LINK = @HAVE_LINK@ +HAVE_LINKAT = @HAVE_LINKAT@ +HAVE_LONG_LONG_INT = @HAVE_LONG_LONG_INT@ +HAVE_LSTAT = @HAVE_LSTAT@ +HAVE_MBRLEN = @HAVE_MBRLEN@ +HAVE_MBRTOWC = @HAVE_MBRTOWC@ +HAVE_MBSINIT = @HAVE_MBSINIT@ +HAVE_MBSLEN = @HAVE_MBSLEN@ +HAVE_MBSNRTOWCS = @HAVE_MBSNRTOWCS@ +HAVE_MBSRTOWCS = @HAVE_MBSRTOWCS@ +HAVE_MEMCHR = @HAVE_MEMCHR@ +HAVE_MEMPCPY = @HAVE_MEMPCPY@ +HAVE_MKDIRAT = @HAVE_MKDIRAT@ +HAVE_MKDTEMP = @HAVE_MKDTEMP@ +HAVE_MKFIFO = @HAVE_MKFIFO@ +HAVE_MKFIFOAT = @HAVE_MKFIFOAT@ +HAVE_MKNOD = @HAVE_MKNOD@ +HAVE_MKNODAT = @HAVE_MKNODAT@ +HAVE_MKOSTEMP = @HAVE_MKOSTEMP@ +HAVE_MKOSTEMPS = @HAVE_MKOSTEMPS@ +HAVE_MKSTEMP = @HAVE_MKSTEMP@ +HAVE_MKSTEMPS = @HAVE_MKSTEMPS@ +HAVE_MSVC_INVALID_PARAMETER_HANDLER = @HAVE_MSVC_INVALID_PARAMETER_HANDLER@ +HAVE_NANOSLEEP = @HAVE_NANOSLEEP@ +HAVE_NL_LANGINFO = @HAVE_NL_LANGINFO@ +HAVE_OPENAT = @HAVE_OPENAT@ +HAVE_OS_H = @HAVE_OS_H@ +HAVE_PCLOSE = @HAVE_PCLOSE@ +HAVE_PIPE = @HAVE_PIPE@ +HAVE_PIPE2 = @HAVE_PIPE2@ +HAVE_POPEN = @HAVE_POPEN@ +HAVE_POSIX_OPENPT = @HAVE_POSIX_OPENPT@ +HAVE_PREAD = @HAVE_PREAD@ +HAVE_PTSNAME = @HAVE_PTSNAME@ +HAVE_PTSNAME_R = @HAVE_PTSNAME_R@ +HAVE_PWRITE = @HAVE_PWRITE@ +HAVE_RANDOM = @HAVE_RANDOM@ +HAVE_RANDOM_H = @HAVE_RANDOM_H@ +HAVE_RANDOM_R = @HAVE_RANDOM_R@ +HAVE_RAWMEMCHR = @HAVE_RAWMEMCHR@ +HAVE_READLINK = @HAVE_READLINK@ +HAVE_READLINKAT = @HAVE_READLINKAT@ +HAVE_REALPATH = @HAVE_REALPATH@ +HAVE_RENAMEAT = @HAVE_RENAMEAT@ +HAVE_RPMATCH = @HAVE_RPMATCH@ +HAVE_SETENV = @HAVE_SETENV@ +HAVE_SETHOSTNAME = @HAVE_SETHOSTNAME@ +HAVE_SIGNED_SIG_ATOMIC_T = @HAVE_SIGNED_SIG_ATOMIC_T@ +HAVE_SIGNED_WCHAR_T = @HAVE_SIGNED_WCHAR_T@ +HAVE_SIGNED_WINT_T = @HAVE_SIGNED_WINT_T@ +HAVE_SLEEP = @HAVE_SLEEP@ +HAVE_STDINT_H = @HAVE_STDINT_H@ +HAVE_STPCPY = @HAVE_STPCPY@ +HAVE_STPNCPY = @HAVE_STPNCPY@ +HAVE_STRCASESTR = @HAVE_STRCASESTR@ +HAVE_STRCHRNUL = @HAVE_STRCHRNUL@ +HAVE_STRPBRK = @HAVE_STRPBRK@ +HAVE_STRPTIME = @HAVE_STRPTIME@ +HAVE_STRSEP = @HAVE_STRSEP@ +HAVE_STRTOD = @HAVE_STRTOD@ +HAVE_STRTOLL = @HAVE_STRTOLL@ +HAVE_STRTOULL = @HAVE_STRTOULL@ +HAVE_STRUCT_RANDOM_DATA = @HAVE_STRUCT_RANDOM_DATA@ +HAVE_STRUCT_TIMEVAL = @HAVE_STRUCT_TIMEVAL@ +HAVE_STRVERSCMP = @HAVE_STRVERSCMP@ +HAVE_SYMLINK = @HAVE_SYMLINK@ +HAVE_SYMLINKAT = @HAVE_SYMLINKAT@ +HAVE_SYS_BITYPES_H = @HAVE_SYS_BITYPES_H@ +HAVE_SYS_INTTYPES_H = @HAVE_SYS_INTTYPES_H@ +HAVE_SYS_LOADAVG_H = @HAVE_SYS_LOADAVG_H@ +HAVE_SYS_PARAM_H = @HAVE_SYS_PARAM_H@ +HAVE_SYS_TIME_H = @HAVE_SYS_TIME_H@ +HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@ +HAVE_TIMEGM = @HAVE_TIMEGM@ +HAVE_UNISTD_H = @HAVE_UNISTD_H@ +HAVE_UNLINKAT = @HAVE_UNLINKAT@ +HAVE_UNLOCKPT = @HAVE_UNLOCKPT@ +HAVE_UNSIGNED_LONG_LONG_INT = @HAVE_UNSIGNED_LONG_LONG_INT@ +HAVE_USLEEP = @HAVE_USLEEP@ +HAVE_UTIMENSAT = @HAVE_UTIMENSAT@ +HAVE_VASPRINTF = @HAVE_VASPRINTF@ +HAVE_VDPRINTF = @HAVE_VDPRINTF@ +HAVE_WCHAR_H = @HAVE_WCHAR_H@ +HAVE_WCHAR_T = @HAVE_WCHAR_T@ +HAVE_WCPCPY = @HAVE_WCPCPY@ +HAVE_WCPNCPY = @HAVE_WCPNCPY@ +HAVE_WCRTOMB = @HAVE_WCRTOMB@ +HAVE_WCSCASECMP = @HAVE_WCSCASECMP@ +HAVE_WCSCAT = @HAVE_WCSCAT@ +HAVE_WCSCHR = @HAVE_WCSCHR@ +HAVE_WCSCMP = @HAVE_WCSCMP@ +HAVE_WCSCOLL = @HAVE_WCSCOLL@ +HAVE_WCSCPY = @HAVE_WCSCPY@ +HAVE_WCSCSPN = @HAVE_WCSCSPN@ +HAVE_WCSDUP = @HAVE_WCSDUP@ +HAVE_WCSLEN = @HAVE_WCSLEN@ +HAVE_WCSNCASECMP = @HAVE_WCSNCASECMP@ +HAVE_WCSNCAT = @HAVE_WCSNCAT@ +HAVE_WCSNCMP = @HAVE_WCSNCMP@ +HAVE_WCSNCPY = @HAVE_WCSNCPY@ +HAVE_WCSNLEN = @HAVE_WCSNLEN@ +HAVE_WCSNRTOMBS = @HAVE_WCSNRTOMBS@ +HAVE_WCSPBRK = @HAVE_WCSPBRK@ +HAVE_WCSRCHR = @HAVE_WCSRCHR@ +HAVE_WCSRTOMBS = @HAVE_WCSRTOMBS@ +HAVE_WCSSPN = @HAVE_WCSSPN@ +HAVE_WCSSTR = @HAVE_WCSSTR@ +HAVE_WCSTOK = @HAVE_WCSTOK@ +HAVE_WCSWIDTH = @HAVE_WCSWIDTH@ +HAVE_WCSXFRM = @HAVE_WCSXFRM@ +HAVE_WCTRANS_T = @HAVE_WCTRANS_T@ +HAVE_WCTYPE_H = @HAVE_WCTYPE_H@ +HAVE_WCTYPE_T = @HAVE_WCTYPE_T@ +HAVE_WINSOCK2_H = @HAVE_WINSOCK2_H@ +HAVE_WINT_T = @HAVE_WINT_T@ +HAVE_WMEMCHR = @HAVE_WMEMCHR@ +HAVE_WMEMCMP = @HAVE_WMEMCMP@ +HAVE_WMEMCPY = @HAVE_WMEMCPY@ +HAVE_WMEMMOVE = @HAVE_WMEMMOVE@ +HAVE_WMEMSET = @HAVE_WMEMSET@ +HAVE_XLOCALE_H = @HAVE_XLOCALE_H@ +HAVE__BOOL = @HAVE__BOOL@ +HAVE__EXIT = @HAVE__EXIT@ +INCLUDE_NEXT = @INCLUDE_NEXT@ +INCLUDE_NEXT_AS_FIRST_DIRECTIVE = @INCLUDE_NEXT_AS_FIRST_DIRECTIVE@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTLLIBS = @INTLLIBS@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSED_LIBDEPS = @LIBSED_LIBDEPS@ +LIBSED_LTLIBDEPS = @LIBSED_LTLIBDEPS@ +LIB_ACL = @LIB_ACL@ +LIB_SELINUX = @LIB_SELINUX@ +LOCALCHARSET_TESTS_ENVIRONMENT = @LOCALCHARSET_TESTS_ENVIRONMENT@ +LOCALE_FR = @LOCALE_FR@ +LOCALE_FR_UTF8 = @LOCALE_FR_UTF8@ +LOCALE_JA = @LOCALE_JA@ +LOCALE_ZH_CN = @LOCALE_ZH_CN@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ +NEXT_AS_FIRST_DIRECTIVE_ERRNO_H = @NEXT_AS_FIRST_DIRECTIVE_ERRNO_H@ +NEXT_AS_FIRST_DIRECTIVE_FCNTL_H = @NEXT_AS_FIRST_DIRECTIVE_FCNTL_H@ +NEXT_AS_FIRST_DIRECTIVE_GETOPT_H = @NEXT_AS_FIRST_DIRECTIVE_GETOPT_H@ +NEXT_AS_FIRST_DIRECTIVE_LANGINFO_H = @NEXT_AS_FIRST_DIRECTIVE_LANGINFO_H@ +NEXT_AS_FIRST_DIRECTIVE_LOCALE_H = @NEXT_AS_FIRST_DIRECTIVE_LOCALE_H@ +NEXT_AS_FIRST_DIRECTIVE_SELINUX_SELINUX_H = @NEXT_AS_FIRST_DIRECTIVE_SELINUX_SELINUX_H@ +NEXT_AS_FIRST_DIRECTIVE_STDARG_H = @NEXT_AS_FIRST_DIRECTIVE_STDARG_H@ +NEXT_AS_FIRST_DIRECTIVE_STDDEF_H = @NEXT_AS_FIRST_DIRECTIVE_STDDEF_H@ +NEXT_AS_FIRST_DIRECTIVE_STDINT_H = @NEXT_AS_FIRST_DIRECTIVE_STDINT_H@ +NEXT_AS_FIRST_DIRECTIVE_STDIO_H = @NEXT_AS_FIRST_DIRECTIVE_STDIO_H@ +NEXT_AS_FIRST_DIRECTIVE_STDLIB_H = @NEXT_AS_FIRST_DIRECTIVE_STDLIB_H@ +NEXT_AS_FIRST_DIRECTIVE_STRING_H = @NEXT_AS_FIRST_DIRECTIVE_STRING_H@ +NEXT_AS_FIRST_DIRECTIVE_SYS_STAT_H = @NEXT_AS_FIRST_DIRECTIVE_SYS_STAT_H@ +NEXT_AS_FIRST_DIRECTIVE_SYS_TIME_H = @NEXT_AS_FIRST_DIRECTIVE_SYS_TIME_H@ +NEXT_AS_FIRST_DIRECTIVE_SYS_TYPES_H = @NEXT_AS_FIRST_DIRECTIVE_SYS_TYPES_H@ +NEXT_AS_FIRST_DIRECTIVE_TIME_H = @NEXT_AS_FIRST_DIRECTIVE_TIME_H@ +NEXT_AS_FIRST_DIRECTIVE_UNISTD_H = @NEXT_AS_FIRST_DIRECTIVE_UNISTD_H@ +NEXT_AS_FIRST_DIRECTIVE_WCHAR_H = @NEXT_AS_FIRST_DIRECTIVE_WCHAR_H@ +NEXT_AS_FIRST_DIRECTIVE_WCTYPE_H = @NEXT_AS_FIRST_DIRECTIVE_WCTYPE_H@ +NEXT_ERRNO_H = @NEXT_ERRNO_H@ +NEXT_FCNTL_H = @NEXT_FCNTL_H@ +NEXT_GETOPT_H = @NEXT_GETOPT_H@ +NEXT_LANGINFO_H = @NEXT_LANGINFO_H@ +NEXT_LOCALE_H = @NEXT_LOCALE_H@ +NEXT_SELINUX_SELINUX_H = @NEXT_SELINUX_SELINUX_H@ +NEXT_STDARG_H = @NEXT_STDARG_H@ +NEXT_STDDEF_H = @NEXT_STDDEF_H@ +NEXT_STDINT_H = @NEXT_STDINT_H@ +NEXT_STDIO_H = @NEXT_STDIO_H@ +NEXT_STDLIB_H = @NEXT_STDLIB_H@ +NEXT_STRING_H = @NEXT_STRING_H@ +NEXT_SYS_STAT_H = @NEXT_SYS_STAT_H@ +NEXT_SYS_TIME_H = @NEXT_SYS_TIME_H@ +NEXT_SYS_TYPES_H = @NEXT_SYS_TYPES_H@ +NEXT_TIME_H = @NEXT_TIME_H@ +NEXT_UNISTD_H = @NEXT_UNISTD_H@ +NEXT_WCHAR_H = @NEXT_WCHAR_H@ +NEXT_WCTYPE_H = @NEXT_WCTYPE_H@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POSUB = @POSUB@ +PRAGMA_COLUMNS = @PRAGMA_COLUMNS@ +PRAGMA_SYSTEM_HEADER = @PRAGMA_SYSTEM_HEADER@ +PTHREAD_H_DEFINES_STRUCT_TIMESPEC = @PTHREAD_H_DEFINES_STRUCT_TIMESPEC@ +PTRDIFF_T_SUFFIX = @PTRDIFF_T_SUFFIX@ +RANLIB = @RANLIB@ +REPLACE_BTOWC = @REPLACE_BTOWC@ +REPLACE_CALLOC = @REPLACE_CALLOC@ +REPLACE_CANONICALIZE_FILE_NAME = @REPLACE_CANONICALIZE_FILE_NAME@ +REPLACE_CHOWN = @REPLACE_CHOWN@ +REPLACE_CLOSE = @REPLACE_CLOSE@ +REPLACE_DPRINTF = @REPLACE_DPRINTF@ +REPLACE_DUP = @REPLACE_DUP@ +REPLACE_DUP2 = @REPLACE_DUP2@ +REPLACE_DUPLOCALE = @REPLACE_DUPLOCALE@ +REPLACE_FCHOWNAT = @REPLACE_FCHOWNAT@ +REPLACE_FCLOSE = @REPLACE_FCLOSE@ +REPLACE_FCNTL = @REPLACE_FCNTL@ +REPLACE_FDOPEN = @REPLACE_FDOPEN@ +REPLACE_FFLUSH = @REPLACE_FFLUSH@ +REPLACE_FOPEN = @REPLACE_FOPEN@ +REPLACE_FPRINTF = @REPLACE_FPRINTF@ +REPLACE_FPURGE = @REPLACE_FPURGE@ +REPLACE_FREOPEN = @REPLACE_FREOPEN@ +REPLACE_FSEEK = @REPLACE_FSEEK@ +REPLACE_FSEEKO = @REPLACE_FSEEKO@ +REPLACE_FSTAT = @REPLACE_FSTAT@ +REPLACE_FSTATAT = @REPLACE_FSTATAT@ +REPLACE_FTELL = @REPLACE_FTELL@ +REPLACE_FTELLO = @REPLACE_FTELLO@ +REPLACE_FTRUNCATE = @REPLACE_FTRUNCATE@ +REPLACE_FUTIMENS = @REPLACE_FUTIMENS@ +REPLACE_GETCWD = @REPLACE_GETCWD@ +REPLACE_GETDELIM = @REPLACE_GETDELIM@ +REPLACE_GETDOMAINNAME = @REPLACE_GETDOMAINNAME@ +REPLACE_GETGROUPS = @REPLACE_GETGROUPS@ +REPLACE_GETLINE = @REPLACE_GETLINE@ +REPLACE_GETLOGIN_R = @REPLACE_GETLOGIN_R@ +REPLACE_GETPAGESIZE = @REPLACE_GETPAGESIZE@ +REPLACE_GETTIMEOFDAY = @REPLACE_GETTIMEOFDAY@ +REPLACE_ISATTY = @REPLACE_ISATTY@ +REPLACE_ISWBLANK = @REPLACE_ISWBLANK@ +REPLACE_ISWCNTRL = @REPLACE_ISWCNTRL@ +REPLACE_LCHOWN = @REPLACE_LCHOWN@ +REPLACE_LINK = @REPLACE_LINK@ +REPLACE_LINKAT = @REPLACE_LINKAT@ +REPLACE_LOCALECONV = @REPLACE_LOCALECONV@ +REPLACE_LOCALTIME_R = @REPLACE_LOCALTIME_R@ +REPLACE_LSEEK = @REPLACE_LSEEK@ +REPLACE_LSTAT = @REPLACE_LSTAT@ +REPLACE_MALLOC = @REPLACE_MALLOC@ +REPLACE_MBRLEN = @REPLACE_MBRLEN@ +REPLACE_MBRTOWC = @REPLACE_MBRTOWC@ +REPLACE_MBSINIT = @REPLACE_MBSINIT@ +REPLACE_MBSNRTOWCS = @REPLACE_MBSNRTOWCS@ +REPLACE_MBSRTOWCS = @REPLACE_MBSRTOWCS@ +REPLACE_MBSTATE_T = @REPLACE_MBSTATE_T@ +REPLACE_MBTOWC = @REPLACE_MBTOWC@ +REPLACE_MEMCHR = @REPLACE_MEMCHR@ +REPLACE_MEMMEM = @REPLACE_MEMMEM@ +REPLACE_MKDIR = @REPLACE_MKDIR@ +REPLACE_MKFIFO = @REPLACE_MKFIFO@ +REPLACE_MKNOD = @REPLACE_MKNOD@ +REPLACE_MKSTEMP = @REPLACE_MKSTEMP@ +REPLACE_MKTIME = @REPLACE_MKTIME@ +REPLACE_NANOSLEEP = @REPLACE_NANOSLEEP@ +REPLACE_NL_LANGINFO = @REPLACE_NL_LANGINFO@ +REPLACE_NULL = @REPLACE_NULL@ +REPLACE_OBSTACK_PRINTF = @REPLACE_OBSTACK_PRINTF@ +REPLACE_OPEN = @REPLACE_OPEN@ +REPLACE_OPENAT = @REPLACE_OPENAT@ +REPLACE_PERROR = @REPLACE_PERROR@ +REPLACE_POPEN = @REPLACE_POPEN@ +REPLACE_PREAD = @REPLACE_PREAD@ +REPLACE_PRINTF = @REPLACE_PRINTF@ +REPLACE_PTSNAME = @REPLACE_PTSNAME@ +REPLACE_PTSNAME_R = @REPLACE_PTSNAME_R@ +REPLACE_PUTENV = @REPLACE_PUTENV@ +REPLACE_PWRITE = @REPLACE_PWRITE@ +REPLACE_RANDOM_R = @REPLACE_RANDOM_R@ +REPLACE_READ = @REPLACE_READ@ +REPLACE_READLINK = @REPLACE_READLINK@ +REPLACE_REALLOC = @REPLACE_REALLOC@ +REPLACE_REALPATH = @REPLACE_REALPATH@ +REPLACE_REMOVE = @REPLACE_REMOVE@ +REPLACE_RENAME = @REPLACE_RENAME@ +REPLACE_RENAMEAT = @REPLACE_RENAMEAT@ +REPLACE_RMDIR = @REPLACE_RMDIR@ +REPLACE_SETENV = @REPLACE_SETENV@ +REPLACE_SETLOCALE = @REPLACE_SETLOCALE@ +REPLACE_SLEEP = @REPLACE_SLEEP@ +REPLACE_SNPRINTF = @REPLACE_SNPRINTF@ +REPLACE_SPRINTF = @REPLACE_SPRINTF@ +REPLACE_STAT = @REPLACE_STAT@ +REPLACE_STDIO_READ_FUNCS = @REPLACE_STDIO_READ_FUNCS@ +REPLACE_STDIO_WRITE_FUNCS = @REPLACE_STDIO_WRITE_FUNCS@ +REPLACE_STPNCPY = @REPLACE_STPNCPY@ +REPLACE_STRCASESTR = @REPLACE_STRCASESTR@ +REPLACE_STRCHRNUL = @REPLACE_STRCHRNUL@ +REPLACE_STRDUP = @REPLACE_STRDUP@ +REPLACE_STRERROR = @REPLACE_STRERROR@ +REPLACE_STRERROR_R = @REPLACE_STRERROR_R@ +REPLACE_STRNCAT = @REPLACE_STRNCAT@ +REPLACE_STRNDUP = @REPLACE_STRNDUP@ +REPLACE_STRNLEN = @REPLACE_STRNLEN@ +REPLACE_STRSIGNAL = @REPLACE_STRSIGNAL@ +REPLACE_STRSTR = @REPLACE_STRSTR@ +REPLACE_STRTOD = @REPLACE_STRTOD@ +REPLACE_STRTOK_R = @REPLACE_STRTOK_R@ +REPLACE_STRUCT_LCONV = @REPLACE_STRUCT_LCONV@ +REPLACE_STRUCT_TIMEVAL = @REPLACE_STRUCT_TIMEVAL@ +REPLACE_SYMLINK = @REPLACE_SYMLINK@ +REPLACE_TIMEGM = @REPLACE_TIMEGM@ +REPLACE_TMPFILE = @REPLACE_TMPFILE@ +REPLACE_TOWLOWER = @REPLACE_TOWLOWER@ +REPLACE_TTYNAME_R = @REPLACE_TTYNAME_R@ +REPLACE_UNLINK = @REPLACE_UNLINK@ +REPLACE_UNLINKAT = @REPLACE_UNLINKAT@ +REPLACE_UNSETENV = @REPLACE_UNSETENV@ +REPLACE_USLEEP = @REPLACE_USLEEP@ +REPLACE_UTIMENSAT = @REPLACE_UTIMENSAT@ +REPLACE_VASPRINTF = @REPLACE_VASPRINTF@ +REPLACE_VDPRINTF = @REPLACE_VDPRINTF@ +REPLACE_VFPRINTF = @REPLACE_VFPRINTF@ +REPLACE_VPRINTF = @REPLACE_VPRINTF@ +REPLACE_VSNPRINTF = @REPLACE_VSNPRINTF@ +REPLACE_VSPRINTF = @REPLACE_VSPRINTF@ +REPLACE_WCRTOMB = @REPLACE_WCRTOMB@ +REPLACE_WCSNRTOMBS = @REPLACE_WCSNRTOMBS@ +REPLACE_WCSRTOMBS = @REPLACE_WCSRTOMBS@ +REPLACE_WCSWIDTH = @REPLACE_WCSWIDTH@ +REPLACE_WCTOB = @REPLACE_WCTOB@ +REPLACE_WCTOMB = @REPLACE_WCTOMB@ +REPLACE_WCWIDTH = @REPLACE_WCWIDTH@ +REPLACE_WRITE = @REPLACE_WRITE@ +SED_FEATURE_VERSION = @SED_FEATURE_VERSION@ +SELINUX_CONTEXT_H = @SELINUX_CONTEXT_H@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SIG_ATOMIC_T_SUFFIX = @SIG_ATOMIC_T_SUFFIX@ +SIZE_T_SUFFIX = @SIZE_T_SUFFIX@ +STDARG_H = @STDARG_H@ +STDBOOL_H = @STDBOOL_H@ +STDDEF_H = @STDDEF_H@ +STDINT_H = @STDINT_H@ +STRIP = @STRIP@ +SYS_TIME_H_DEFINES_STRUCT_TIMESPEC = @SYS_TIME_H_DEFINES_STRUCT_TIMESPEC@ +TIME_H_DEFINES_STRUCT_TIMESPEC = @TIME_H_DEFINES_STRUCT_TIMESPEC@ +UNDEFINE_STRTOK_R = @UNDEFINE_STRTOK_R@ +UNISTD_H_HAVE_WINSOCK2_H = @UNISTD_H_HAVE_WINSOCK2_H@ +UNISTD_H_HAVE_WINSOCK2_H_AND_USE_SOCKETS = @UNISTD_H_HAVE_WINSOCK2_H_AND_USE_SOCKETS@ +USE_ACL = @USE_ACL@ +USE_NLS = @USE_NLS@ +VERSION = @VERSION@ +WCHAR_T_SUFFIX = @WCHAR_T_SUFFIX@ +WINDOWS_64_BIT_OFF_T = @WINDOWS_64_BIT_OFF_T@ +WINDOWS_64_BIT_ST_SIZE = @WINDOWS_64_BIT_ST_SIZE@ +WINT_T_SUFFIX = @WINT_T_SUFFIX@ +XFAIL_TESTS = @XFAIL_TESTS@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +gl_LIBOBJS = @gl_LIBOBJS@ +gl_LTLIBOBJS = @gl_LTLIBOBJS@ +gltests_LIBOBJS = @gltests_LIBOBJS@ +gltests_LTLIBOBJS = @gltests_LTLIBOBJS@ +gltests_WITNESS = @gltests_WITNESS@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +lispdir = @lispdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +info_TEXINFOS = sed.texi +sed_TEXINFOS = config.texi version.texi +dist_man_MANS = sed.1 +dist_noinst_DATA = sed.x sed-in.texi s-texi +dist_noinst_SCRIPTS = groupify.sed +TEXI2DVI = $(top_srcdir)/build-aux/texi2dvi --expand +HELP2MAN = $(top_srcdir)/build-aux/help2man +SED = $(top_builddir)/sed/sed +AM_MAKEINFOHTMLFLAGS = --no-split +all: all-am + +.SUFFIXES: +.SUFFIXES: .dvi .html .info .pdf .ps .texi +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnits doc/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnits doc/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +.texi.info: + restore=: && backupdir="$(am__leading_dot)am$$$$" && \ + am__cwd=`pwd` && $(am__cd) $(srcdir) && \ + rm -rf $$backupdir && mkdir $$backupdir && \ + if ($(MAKEINFO) --version) >/dev/null 2>&1; then \ + for f in $@ $@-[0-9] $@-[0-9][0-9] $(@:.info=).i[0-9] $(@:.info=).i[0-9][0-9]; do \ + if test -f $$f; then mv $$f $$backupdir; restore=mv; else :; fi; \ + done; \ + else :; fi && \ + cd "$$am__cwd"; \ + if $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \ + -o $@ $<; \ + then \ + rc=0; \ + $(am__cd) $(srcdir); \ + else \ + rc=$$?; \ + $(am__cd) $(srcdir) && \ + $$restore $$backupdir/* `echo "./$@" | sed 's|[^/]*$$||'`; \ + fi; \ + rm -rf $$backupdir; exit $$rc + +.texi.dvi: + TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \ + $(TEXI2DVI) --clean $< + +.texi.pdf: + TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \ + $(TEXI2PDF) --clean $< + +.texi.html: + rm -rf $(@:.html=.htp) + if $(MAKEINFOHTML) $(AM_MAKEINFOHTMLFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \ + -o $(@:.html=.htp) $<; \ + then \ + rm -rf $@; \ + if test ! -d $(@:.html=.htp) && test -d $(@:.html=); then \ + mv $(@:.html=) $@; else mv $(@:.html=.htp) $@; fi; \ + else \ + if test ! -d $(@:.html=.htp) && test -d $(@:.html=); then \ + rm -rf $(@:.html=); else rm -Rf $(@:.html=.htp) $@; fi; \ + exit 1; \ + fi +$(srcdir)/sed.info: sed.texi $(srcdir)/version.texi $(sed_TEXINFOS) +sed.dvi: sed.texi $(srcdir)/version.texi $(sed_TEXINFOS) +sed.pdf: sed.texi $(srcdir)/version.texi $(sed_TEXINFOS) +sed.html: sed.texi $(srcdir)/version.texi $(sed_TEXINFOS) +$(srcdir)/version.texi: $(srcdir)/stamp-vti +$(srcdir)/stamp-vti: sed.texi $(top_srcdir)/configure + @(dir=.; test -f ./sed.texi || dir=$(srcdir); \ + set `$(SHELL) $(top_srcdir)/build-aux/mdate-sh $$dir/sed.texi`; \ + echo "@set UPDATED $$1 $$2 $$3"; \ + echo "@set UPDATED-MONTH $$2 $$3"; \ + echo "@set EDITION $(VERSION)"; \ + echo "@set VERSION $(VERSION)") > vti.tmp + @cmp -s vti.tmp $(srcdir)/version.texi \ + || (echo "Updating $(srcdir)/version.texi"; \ + cp vti.tmp $(srcdir)/version.texi) + -@rm -f vti.tmp + @cp $(srcdir)/version.texi $@ + +mostlyclean-vti: + -rm -f vti.tmp + +maintainer-clean-vti: + -rm -f $(srcdir)/stamp-vti $(srcdir)/version.texi +.dvi.ps: + TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + $(DVIPS) -o $@ $< + +uninstall-dvi-am: + @$(NORMAL_UNINSTALL) + @list='$(DVIS)'; test -n "$(dvidir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(dvidir)/$$f'"; \ + rm -f "$(DESTDIR)$(dvidir)/$$f"; \ + done + +uninstall-html-am: + @$(NORMAL_UNINSTALL) + @list='$(HTMLS)'; test -n "$(htmldir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -rf '$(DESTDIR)$(htmldir)/$$f'"; \ + rm -rf "$(DESTDIR)$(htmldir)/$$f"; \ + done + +uninstall-info-am: + @$(PRE_UNINSTALL) + @if test -d '$(DESTDIR)$(infodir)' && $(am__can_run_installinfo); then \ + list='$(INFO_DEPS)'; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + echo " install-info --info-dir='$(DESTDIR)$(infodir)' --remove '$(DESTDIR)$(infodir)/$$relfile'"; \ + if install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$$relfile"; \ + then :; else test ! -f "$(DESTDIR)$(infodir)/$$relfile" || exit 1; fi; \ + done; \ + else :; fi + @$(NORMAL_UNINSTALL) + @list='$(INFO_DEPS)'; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + relfile_i=`echo "$$relfile" | sed 's|\.info$$||;s|$$|.i|'`; \ + (if test -d "$(DESTDIR)$(infodir)" && cd "$(DESTDIR)$(infodir)"; then \ + echo " cd '$(DESTDIR)$(infodir)' && rm -f $$relfile $$relfile-[0-9] $$relfile-[0-9][0-9] $$relfile_i[0-9] $$relfile_i[0-9][0-9]"; \ + rm -f $$relfile $$relfile-[0-9] $$relfile-[0-9][0-9] $$relfile_i[0-9] $$relfile_i[0-9][0-9]; \ + else :; fi); \ + done + +uninstall-pdf-am: + @$(NORMAL_UNINSTALL) + @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(pdfdir)/$$f'"; \ + rm -f "$(DESTDIR)$(pdfdir)/$$f"; \ + done + +uninstall-ps-am: + @$(NORMAL_UNINSTALL) + @list='$(PSS)'; test -n "$(psdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(psdir)/$$f'"; \ + rm -f "$(DESTDIR)$(psdir)/$$f"; \ + done + +dist-info: $(INFO_DEPS) + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + list='$(INFO_DEPS)'; \ + for base in $$list; do \ + case $$base in \ + $(srcdir)/*) base=`echo "$$base" | sed "s|^$$srcdirstrip/||"`;; \ + esac; \ + if test -f $$base; then d=.; else d=$(srcdir); fi; \ + base_i=`echo "$$base" | sed 's|\.info$$||;s|$$|.i|'`; \ + for file in $$d/$$base $$d/$$base-[0-9] $$d/$$base-[0-9][0-9] $$d/$$base_i[0-9] $$d/$$base_i[0-9][0-9]; do \ + if test -f $$file; then \ + relfile=`expr "$$file" : "$$d/\(.*\)"`; \ + test -f "$(distdir)/$$relfile" || \ + cp -p $$file "$(distdir)/$$relfile"; \ + else :; fi; \ + done; \ + done + +mostlyclean-aminfo: + -rm -rf sed.aux sed.cp sed.cps sed.fn sed.fns sed.ky sed.log sed.op sed.pg \ + sed.tmp sed.toc sed.tp sed.vr sed.vrs + +clean-aminfo: + -test -z "sed.dvi sed.pdf sed.ps sed.html" \ + || rm -rf sed.dvi sed.pdf sed.ps sed.html + +maintainer-clean-aminfo: + @list='$(INFO_DEPS)'; for i in $$list; do \ + i_i=`echo "$$i" | sed 's|\.info$$||;s|$$|.i|'`; \ + echo " rm -f $$i $$i-[0-9] $$i-[0-9][0-9] $$i_i[0-9] $$i_i[0-9][0-9]"; \ + rm -f $$i $$i-[0-9] $$i-[0-9][0-9] $$i_i[0-9] $$i_i[0-9][0-9]; \ + done +install-man1: $(dist_man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(dist_man_MANS)'; \ + test -n "$(man1dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.1[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ + done; } + +uninstall-man1: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man1dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.1[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) +tags: TAGS +TAGS: + +ctags: CTAGS +CTAGS: + +cscope cscopelist: + + +distdir: $(DISTFILES) + @list='$(MANS)'; if test -n "$$list"; then \ + list=`for p in $$list; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \ + if test -n "$$list" && \ + grep 'ab help2man is required to generate this page' $$list >/dev/null; then \ + echo "error: found man pages containing the 'missing help2man' replacement text:" >&2; \ + grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \ + echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \ + echo " typically 'make maintainer-clean' will remove them" >&2; \ + exit 1; \ + else :; fi; \ + else :; fi + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$(top_distdir)" distdir="$(distdir)" \ + dist-info dist-hook +check-am: all-am +check: check-am +all-am: Makefile $(INFO_DEPS) $(SCRIPTS) $(MANS) $(DATA) +installdirs: + for dir in "$(DESTDIR)$(infodir)" "$(DESTDIR)$(man1dir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-aminfo clean-generic mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: $(DVIS) + +html: html-am + +html-am: $(HTMLS) + +info: info-am + +info-am: $(INFO_DEPS) + +install-data-am: install-info-am install-man + +install-dvi: install-dvi-am + +install-dvi-am: $(DVIS) + @$(NORMAL_INSTALL) + @list='$(DVIS)'; test -n "$(dvidir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(dvidir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(dvidir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(dvidir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(dvidir)" || exit $$?; \ + done +install-exec-am: + +install-html: install-html-am + +install-html-am: $(HTMLS) + @$(NORMAL_INSTALL) + @list='$(HTMLS)'; list2=; test -n "$(htmldir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(htmldir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p" || test -d "$$p"; then d=; else d="$(srcdir)/"; fi; \ + $(am__strip_dir) \ + d2=$$d$$p; \ + if test -d "$$d2"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)/$$f'"; \ + $(MKDIR_P) "$(DESTDIR)$(htmldir)/$$f" || exit 1; \ + echo " $(INSTALL_DATA) '$$d2'/* '$(DESTDIR)$(htmldir)/$$f'"; \ + $(INSTALL_DATA) "$$d2"/* "$(DESTDIR)$(htmldir)/$$f" || exit $$?; \ + else \ + list2="$$list2 $$d2"; \ + fi; \ + done; \ + test -z "$$list2" || { echo "$$list2" | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(htmldir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(htmldir)" || exit $$?; \ + done; } +install-info: install-info-am + +install-info-am: $(INFO_DEPS) + @$(NORMAL_INSTALL) + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(infodir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(infodir)" || exit 1; \ + fi; \ + for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + esac; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + file_i=`echo "$$file" | sed 's|\.info$$||;s|$$|.i|'`; \ + for ifile in $$d/$$file $$d/$$file-[0-9] $$d/$$file-[0-9][0-9] \ + $$d/$$file_i[0-9] $$d/$$file_i[0-9][0-9] ; do \ + if test -f $$ifile; then \ + echo "$$ifile"; \ + else : ; fi; \ + done; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(infodir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(infodir)" || exit $$?; done + @$(POST_INSTALL) + @if $(am__can_run_installinfo); then \ + list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + echo " install-info --info-dir='$(DESTDIR)$(infodir)' '$(DESTDIR)$(infodir)/$$relfile'";\ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$$relfile" || :;\ + done; \ + else : ; fi +install-man: install-man1 + +install-pdf: install-pdf-am + +install-pdf-am: $(PDFS) + @$(NORMAL_INSTALL) + @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pdfdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pdfdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pdfdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pdfdir)" || exit $$?; done +install-ps: install-ps-am + +install-ps-am: $(PSS) + @$(NORMAL_INSTALL) + @list='$(PSS)'; test -n "$(psdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(psdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(psdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(psdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(psdir)" || exit $$?; done +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-aminfo \ + maintainer-clean-generic maintainer-clean-vti + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-aminfo mostlyclean-generic mostlyclean-vti + +pdf: pdf-am + +pdf-am: $(PDFS) + +ps: ps-am + +ps-am: $(PSS) + +uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-info-am \ + uninstall-man uninstall-pdf-am uninstall-ps-am + +uninstall-man: uninstall-man1 + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-aminfo clean-generic \ + dist-hook dist-info distclean distclean-generic distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-man1 \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-aminfo \ + maintainer-clean-generic maintainer-clean-vti mostlyclean \ + mostlyclean-aminfo mostlyclean-generic mostlyclean-vti pdf \ + pdf-am ps ps-am uninstall uninstall-am uninstall-dvi-am \ + uninstall-html-am uninstall-info-am uninstall-man \ + uninstall-man1 uninstall-pdf-am uninstall-ps-am + + +# To produce better quality output, in the example sed +# scripts we group comments with lines following them; +# since mantaining the "@group...@end group" manually +# is a burden, we do this automatically +$(srcdir)/sed.texi: $(srcdir)/s-texi +$(srcdir)/s-texi: sed-in.texi $(srcdir)/groupify.sed + sed -nf $(srcdir)/groupify.sed \ + < $(srcdir)/sed-in.texi > $(srcdir)/sed-tmp.texi + if cmp $(srcdir)/sed.texi $(srcdir)/sed-tmp.texi; then \ + rm -f $(srcdir)/sed-tmp.texi; \ + else \ + mv -f $(srcdir)/sed-tmp.texi $(srcdir)/sed.texi; \ + fi + echo stamp > $(srcdir)/s-texi + +sed.1: $(top_srcdir)/sed/sed.c $(top_srcdir)/configure.ac $(srcdir)/sed.x + $(HELP2MAN) --name "stream editor for filtering and transforming text" \ + -p sed --include $(srcdir)/sed.x -o $(srcdir)/sed.1 $(SED) + +dist-hook: + touch $(distdir)/sed.1 + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/config.texi b/doc/config.texi new file mode 100644 index 0000000..aa5e35a --- /dev/null +++ b/doc/config.texi @@ -0,0 +1,9 @@ +@dircategory Text creation and manipulation +@direntry +* sed: (sed). Stream EDitor. + +@end direntry + +@clear PERL +@set SSEDEXT @acronym{GNU} extensions +@set SSED @acronym{GNU} @command{sed} diff --git a/doc/groupify.sed b/doc/groupify.sed new file mode 100755 index 0000000..2430710 --- /dev/null +++ b/doc/groupify.sed @@ -0,0 +1,59 @@ +#! /bin/sed -nf +# Script to add @group...@end group tags to sed.texi.in +# so that comments are not separated from the instructions +# that they refer to. + +# Step 1: search for the conventional "@c start----" comment +1a\ +@c Do not edit this file!! It is automatically generated from sed-in.texi. +p +/^@c start-*$/! b + +# Step 2: loop until we find a @ command +:a +n +p +/^@/! ba + +# Step 3: process everything until a "@end" command + +# Step 3.1: Print the blank lines before the group. If we reach the "@end", +# we go back to step 1. +:b +n +/^@end/ { + p + b +} +/^[ ]*$/ { + p + bb +} + +# Step 3.2: Add to hold space every line until an empty one or "@end" +h +:c +n +/^@end example/! { + /^[ ]*$/! { + H + bc + } +} + +# Step 3.3: Working in hold space, add @group...@end group if there are +# at least two lines. Then print the lines we processed and +# switch back to pattern space. +x +/\n/ { + s/.*/@group\ +&\ +@end group/ +} +p + +# Step 3.4: Switch back to pattern space, print the first blank line +# and possibly go back to step 3.1 +x +p +/^@end/ !bb diff --git a/doc/s-texi b/doc/s-texi new file mode 100644 index 0000000..859afb1 --- /dev/null +++ b/doc/s-texi @@ -0,0 +1 @@ +stamp diff --git a/doc/sed-in.texi b/doc/sed-in.texi new file mode 100644 index 0000000..bf5158c --- /dev/null +++ b/doc/sed-in.texi @@ -0,0 +1,4187 @@ +\input texinfo @c -*-texinfo-*- +@c +@c -- Stuff that needs adding: ---------------------------------------------- +@c (nothing!) +@c -------------------------------------------------------------------------- +@c Check for consistency: regexps in @code, text that they match in @samp. +@c +@c Tips: +@c @command for command +@c @samp for command fragments: @samp{cat -s} +@c @code for sed commands and flags +@c Use ``quote'' not `quote' or "quote". +@c +@c %**start of header +@setfilename sed.info +@settitle sed, a stream editor +@c %**end of header + +@c @smallbook + +@include version.texi + +@c Combine indices. +@syncodeindex ky cp +@syncodeindex pg cp +@syncodeindex tp cp + +@defcodeindex op +@syncodeindex op fn + +@include config.texi + +@copying +This file documents version @value{VERSION} of +@value{SSED}, a stream editor. + +Copyright @copyright{} 1998, 1999, 2001, 2002, 2003, 2004 Free +Software Foundation, Inc. + +This document is released under the terms of the @acronym{GNU} Free +Documentation License as published by the Free Software Foundation; +either version 1.1, or (at your option) any later version. + +You should have received a copy of the @acronym{GNU} Free Documentation +License along with @value{SSED}; see the file @file{COPYING.DOC}. +If not, write to the Free Software Foundation, 59 Temple Place - Suite +330, Boston, MA 02110-1301, USA. + +There are no Cover Texts and no Invariant Sections; this text, along +with its equivalent in the printed manual, constitutes the Title Page. +@end copying + +@setchapternewpage off + +@titlepage +@title @command{sed}, a stream editor +@subtitle version @value{VERSION}, @value{UPDATED} +@author by Ken Pizzini, Paolo Bonzini + +@page +@vskip 0pt plus 1filll +Copyright @copyright{} 1998, 1999 Free Software Foundation, Inc. + +@insertcopying + +Published by the Free Software Foundation, @* +51 Franklin Street, Fifth Floor @* +Boston, MA 02110-1301, USA +@end titlepage + + +@node Top +@top + +@ifnottex +@insertcopying +@end ifnottex + +@menu +* Introduction:: Introduction +* Invoking sed:: Invocation +* sed Programs:: @command{sed} programs +* Examples:: Some sample scripts +* Limitations:: Limitations and (non-)limitations of @value{SSED} +* Other Resources:: Other resources for learning about @command{sed} +* Reporting Bugs:: Reporting bugs + +* Extended regexps:: @command{egrep}-style regular expressions +@ifset PERL +* Perl regexps:: Perl-style regular expressions +@end ifset + +* Concept Index:: A menu with all the topics in this manual. +* Command and Option Index:: A menu with all @command{sed} commands and + command-line options. + +@detailmenu +--- The detailed node listing --- + +sed Programs: +* Execution Cycle:: How @command{sed} works +* Addresses:: Selecting lines with @command{sed} +* Regular Expressions:: Overview of regular expression syntax +* Common Commands:: Often used commands +* The "s" Command:: @command{sed}'s Swiss Army Knife +* Other Commands:: Less frequently used commands +* Programming Commands:: Commands for @command{sed} gurus +* Extended Commands:: Commands specific of @value{SSED} +* Escapes:: Specifying special characters + +Examples: +* Centering lines:: +* Increment a number:: +* Rename files to lower case:: +* Print bash environment:: +* Reverse chars of lines:: +* tac:: Reverse lines of files +* cat -n:: Numbering lines +* cat -b:: Numbering non-blank lines +* wc -c:: Counting chars +* wc -w:: Counting words +* wc -l:: Counting lines +* head:: Printing the first lines +* tail:: Printing the last lines +* uniq:: Make duplicate lines unique +* uniq -d:: Print duplicated lines of input +* uniq -u:: Remove all duplicated lines +* cat -s:: Squeezing blank lines + +@ifset PERL +Perl regexps:: Perl-style regular expressions +* Backslash:: Introduces special sequences +* Circumflex/dollar sign/period:: Behave specially with regard to new lines +* Square brackets:: Are a bit different in strange cases +* Options setting:: Toggle modifiers in the middle of a regexp +* Non-capturing subpatterns:: Are not counted when backreferencing +* Repetition:: Allows for non-greedy matching +* Backreferences:: Allows for more than 10 back references +* Assertions:: Allows for complex look ahead matches +* Non-backtracking subpatterns:: Often gives more performance +* Conditional subpatterns:: Allows if/then/else branches +* Recursive patterns:: For example to match parentheses +* Comments:: Because things can get complex... +@end ifset + +@end detailmenu +@end menu + + +@node Introduction +@chapter Introduction + +@cindex Stream editor +@command{sed} is a stream editor. +A stream editor is used to perform basic text +transformations on an input stream +(a file or input from a pipeline). +While in some ways similar to an editor which +permits scripted edits (such as @command{ed}), +@command{sed} works by making only one pass over the +input(s), and is consequently more efficient. +But it is @command{sed}'s ability to filter text in a pipeline +which particularly distinguishes it from other types of +editors. + + +@node Invoking sed +@chapter Invocation + +Normally @command{sed} is invoked like this: + +@example +sed SCRIPT INPUTFILE... +@end example + +The full format for invoking @command{sed} is: + +@example +sed OPTIONS... [SCRIPT] [INPUTFILE...] +@end example + +If you do not specify @var{INPUTFILE}, or if @var{INPUTFILE} is @file{-}, +@command{sed} filters the contents of the standard input. The @var{script} +is actually the first non-option parameter, which @command{sed} specially +considers a script and not an input file if (and only if) none of the +other @var{options} specifies a script to be executed, that is if neither +of the @option{-e} and @option{-f} options is specified. + +@command{sed} may be invoked with the following command-line options: + +@table @code +@item --version +@opindex --version +@cindex Version, printing +Print out the version of @command{sed} that is being run and a copyright notice, +then exit. + +@item --help +@opindex --help +@cindex Usage summary, printing +Print a usage message briefly summarizing these command-line options +and the bug-reporting address, +then exit. + +@item -n +@itemx --quiet +@itemx --silent +@opindex -n +@opindex --quiet +@opindex --silent +@cindex Disabling autoprint, from command line +By default, @command{sed} prints out the pattern space +at the end of each cycle through the script (@pxref{Execution Cycle, , +How @code{sed} works}). +These options disable this automatic printing, +and @command{sed} only produces output when explicitly told to +via the @code{p} command. + +@item -e @var{script} +@itemx --expression=@var{script} +@opindex -e +@opindex --expression +@cindex Script, from command line +Add the commands in @var{script} to the set of commands to be +run while processing the input. + +@item -f @var{script-file} +@itemx --file=@var{script-file} +@opindex -f +@opindex --file +@cindex Script, from a file +Add the commands contained in the file @var{script-file} +to the set of commands to be run while processing the input. + +@item -i[@var{SUFFIX}] +@itemx --in-place[=@var{SUFFIX}] +@opindex -i +@opindex --in-place +@cindex In-place editing, activating +@cindex @value{SSEDEXT}, in-place editing +This option specifies that files are to be edited in-place. +@value{SSED} does this by creating a temporary file and +sending output to this file rather than to the standard +output.@footnote{This applies to commands such as @code{=}, +@code{a}, @code{c}, @code{i}, @code{l}, @code{p}. You can +still write to the standard output by using the @code{w} +@cindex @value{SSEDEXT}, @file{/dev/stdout} file +or @code{W} commands together with the @file{/dev/stdout} +special file}. + +This option implies @option{-s}. + +When the end of the file is reached, the temporary file is +renamed to the output file's original name. The extension, +if supplied, is used to modify the name of the old file +before renaming the temporary file, thereby making a backup +copy@footnote{Note that @value{SSED} creates the backup +file whether or not any output is actually changed.}). + +@cindex In-place editing, Perl-style backup file names +This rule is followed: if the extension doesn't contain a @code{*}, +then it is appended to the end of the current filename as a +suffix; if the extension does contain one or more @code{*} +characters, then @emph{each} asterisk is replaced with the +current filename. This allows you to add a prefix to the +backup file, instead of (or in addition to) a suffix, or +even to place backup copies of the original files into another +directory (provided the directory already exists). + +If no extension is supplied, the original file is +overwritten without making a backup. + +@item -l @var{N} +@itemx --line-length=@var{N} +@opindex -l +@opindex --line-length +@cindex Line length, setting +Specify the default line-wrap length for the @code{l} command. +A length of 0 (zero) means to never wrap long lines. If +not specified, it is taken to be 70. + +@item --posix +@opindex --posix +@cindex @value{SSEDEXT}, disabling +@value{SSED} includes several extensions to @acronym{POSIX} +sed. In order to simplify writing portable scripts, this +option disables all the extensions that this manual documents, +including additional commands. +@cindex @code{POSIXLY_CORRECT} behavior, enabling +Most of the extensions accept @command{sed} programs that +are outside the syntax mandated by @acronym{POSIX}, but some +of them (such as the behavior of the @command{N} command +described in @pxref{Reporting Bugs}) actually violate the +standard. If you want to disable only the latter kind of +extension, you can set the @code{POSIXLY_CORRECT} variable +to a non-empty value. + +@item -b +@itemx --binary +@opindex -b +@opindex --binary +This option is available on every platform, but is only effective where the +operating system makes a distinction between text files and binary files. +When such a distinction is made---as is the case for MS-DOS, Windows, +Cygwin---text files are composed of lines separated by a carriage return +@emph{and} a line feed character, and @command{sed} does not see the +ending CR. When this option is specified, @command{sed} will open +input files in binary mode, thus not requesting this special processing +and considering lines to end at a line feed. + +@item --follow-symlinks +@opindex --follow-symlinks +This option is available only on platforms that support +symbolic links and has an effect only if option @option{-i} +is specified. In this case, if the file that is specified +on the command line is a symbolic link, @command{sed} will +follow the link and edit the ultimate destination of the +link. The default behavior is to break the symbolic link, +so that the link destination will not be modified. + +@item -r +@itemx --regexp-extended +@opindex -r +@opindex --regexp-extended +@cindex Extended regular expressions, choosing +@cindex @acronym{GNU} extensions, extended regular expressions +Use extended regular expressions rather than basic +regular expressions. Extended regexps are those that +@command{egrep} accepts; they can be clearer because they +usually have less backslashes, but are a @acronym{GNU} extension +and hence scripts that use them are not portable. +@xref{Extended regexps, , Extended regular expressions}. + +@ifset PERL +@item -R +@itemx --regexp-perl +@opindex -R +@opindex --regexp-perl +@cindex Perl-style regular expressions, choosing +@cindex @value{SSEDEXT}, Perl-style regular expressions +Use Perl-style regular expressions rather than basic +regular expressions. Perl-style regexps are extremely +powerful but are a @value{SSED} extension and hence scripts that +use it are not portable. @xref{Perl regexps, , +Perl-style regular expressions}. +@end ifset + +@item -s +@itemx --separate +@opindex -s +@opindex --separate +@cindex Working on separate files +By default, @command{sed} will consider the files specified on the +command line as a single continuous long stream. This @value{SSED} +extension allows the user to consider them as separate files: +range addresses (such as @samp{/abc/,/def/}) are not allowed +to span several files, line numbers are relative to the start +of each file, @code{$} refers to the last line of each file, +and files invoked from the @code{R} commands are rewound at the +start of each file. + +@item -u +@itemx --unbuffered +@opindex -u +@opindex --unbuffered +@cindex Unbuffered I/O, choosing +Buffer both input and output as minimally as practical. +(This is particularly useful if the input is coming from +the likes of @samp{tail -f}, and you wish to see the transformed +output as soon as possible.) + +@item -z +@itemx --null-data +@itemx --zero-terminated +@opindex -z +@opindex --null-data +@opindex --zero-terminated +Treat the input as a set of lines, each terminated by a zero byte +(the ASCII @samp{NUL} character) instead of a newline. This option can +be used with commands like @samp{sort -z} and @samp{find -print0} +to process arbitrary file names. +@end table + +If no @option{-e}, @option{-f}, @option{--expression}, or @option{--file} +options are given on the command-line, +then the first non-option argument on the command line is +taken to be the @var{script} to be executed. + +@cindex Files to be processed as input +If any command-line parameters remain after processing the above, +these parameters are interpreted as the names of input files to +be processed. +@cindex Standard input, processing as input +A file name of @samp{-} refers to the standard input stream. +The standard input will be processed if no file names are specified. + + +@node sed Programs +@chapter @command{sed} Programs + +@cindex @command{sed} program structure +@cindex Script structure +A @command{sed} program consists of one or more @command{sed} commands, +passed in by one or more of the +@option{-e}, @option{-f}, @option{--expression}, and @option{--file} +options, or the first non-option argument if zero of these +options are used. +This document will refer to ``the'' @command{sed} script; +this is understood to mean the in-order catenation +of all of the @var{script}s and @var{script-file}s passed in. + +Commands within a @var{script} or @var{script-file} can be +separated by semicolons (@code{;}) or newlines (ASCII 10). +Some commands, due to their syntax, cannot be followed by semicolons +working as command separators and thus should be terminated +with newlines or be placed at the end of a @var{script} or @var{script-file}. +Commands can also be preceded with optional non-significant +whitespace characters. + +Each @code{sed} command consists of an optional address or +address range, followed by a one-character command name +and any additional command-specific code. + +@menu +* Execution Cycle:: How @command{sed} works +* Addresses:: Selecting lines with @command{sed} +* Regular Expressions:: Overview of regular expression syntax +* Common Commands:: Often used commands +* The "s" Command:: @command{sed}'s Swiss Army Knife +* Other Commands:: Less frequently used commands +* Programming Commands:: Commands for @command{sed} gurus +* Extended Commands:: Commands specific of @value{SSED} +* Escapes:: Specifying special characters +@end menu + + +@node Execution Cycle +@section How @command{sed} Works + +@cindex Buffer spaces, pattern and hold +@cindex Spaces, pattern and hold +@cindex Pattern space, definition +@cindex Hold space, definition +@command{sed} maintains two data buffers: the active @emph{pattern} space, +and the auxiliary @emph{hold} space. Both are initially empty. + +@command{sed} operates by performing the following cycle on each +line of input: first, @command{sed} reads one line from the input +stream, removes any trailing newline, and places it in the pattern space. +Then commands are executed; each command can have an address associated +to it: addresses are a kind of condition code, and a command is only +executed if the condition is verified before the command is to be +executed. + +When the end of the script is reached, unless the @option{-n} option +is in use, the contents of pattern space are printed out to the output +stream, adding back the trailing newline if it was removed.@footnote{Actually, +if @command{sed} prints a line without the terminating newline, it will +nevertheless print the missing newline as soon as more text is sent to +the same output stream, which gives the ``least expected surprise'' +even though it does not make commands like @samp{sed -n p} exactly +identical to @command{cat}.} Then the next cycle starts for the next +input line. + +Unless special commands (like @samp{D}) are used, the pattern space is +deleted between two cycles. The hold space, on the other hand, keeps +its data between cycles (see commands @samp{h}, @samp{H}, @samp{x}, +@samp{g}, @samp{G} to move data between both buffers). + + +@node Addresses +@section Selecting lines with @command{sed} +@cindex Addresses, in @command{sed} scripts +@cindex Line selection +@cindex Selecting lines to process + +Addresses in a @command{sed} script can be in any of the following forms: +@table @code +@item @var{number} +@cindex Address, numeric +@cindex Line, selecting by number +Specifying a line number will match only that line in the input. +(Note that @command{sed} counts lines continuously across all input files +unless @option{-i} or @option{-s} options are specified.) + +@item @var{first}~@var{step} +@cindex @acronym{GNU} extensions, @samp{@var{n}~@var{m}} addresses +This @acronym{GNU} extension matches every @var{step}th line +starting with line @var{first}. +In particular, lines will be selected when there exists +a non-negative @var{n} such that the current line-number equals +@var{first} + (@var{n} * @var{step}). +Thus, to select the odd-numbered lines, +one would use @code{1~2}; +to pick every third line starting with the second, @samp{2~3} would be used; +to pick every fifth line starting with the tenth, use @samp{10~5}; +and @samp{50~0} is just an obscure way of saying @code{50}. + +@item $ +@cindex Address, last line +@cindex Last line, selecting +@cindex Line, selecting last +This address matches the last line of the last file of input, or +the last line of each file when the @option{-i} or @option{-s} options +are specified. + +@item /@var{regexp}/ +@cindex Address, as a regular expression +@cindex Line, selecting by regular expression match +This will select any line which matches the regular expression @var{regexp}. +If @var{regexp} itself includes any @code{/} characters, +each must be escaped by a backslash (@code{\}). + +@cindex empty regular expression +@cindex @value{SSEDEXT}, modifiers and the empty regular expression +The empty regular expression @samp{//} repeats the last regular +expression match (the same holds if the empty regular expression is +passed to the @code{s} command). Note that modifiers to regular expressions +are evaluated when the regular expression is compiled, thus it is invalid to +specify them together with the empty regular expression. + +@item \%@var{regexp}% +(The @code{%} may be replaced by any other single character.) + +@cindex Slash character, in regular expressions +This also matches the regular expression @var{regexp}, +but allows one to use a different delimiter than @code{/}. +This is particularly useful if the @var{regexp} itself contains +a lot of slashes, since it avoids the tedious escaping of every @code{/}. +If @var{regexp} itself includes any delimiter characters, +each must be escaped by a backslash (@code{\}). + +@item /@var{regexp}/I +@itemx \%@var{regexp}%I +@cindex @acronym{GNU} extensions, @code{I} modifier +@ifset PERL +@cindex Perl-style regular expressions, case-insensitive +@end ifset +The @code{I} modifier to regular-expression matching is a @acronym{GNU} +extension which causes the @var{regexp} to be matched in +a case-insensitive manner. + +@item /@var{regexp}/M +@itemx \%@var{regexp}%M +@cindex @value{SSEDEXT}, @code{M} modifier +@ifset PERL +@cindex Perl-style regular expressions, multiline +@end ifset +The @code{M} modifier to regular-expression matching is a @value{SSED} +extension which directs @value{SSED} to match the regular expression +in @cite{multi-line} mode. The modifier causes @code{^} and @code{$} to +match respectively (in addition to the normal behavior) the empty string +after a newline, and the empty string before a newline. There are +special character sequences +@ifset PERL +(@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'} +in basic or extended regular expression modes) +@end ifset +@ifclear PERL +(@code{\`} and @code{\'}) +@end ifclear +which always match the beginning or the end of the buffer. +In addition, +@ifset PERL +just like in Perl mode without the @code{S} modifier, +@end ifset +the period character does not match a new-line character in +multi-line mode. + +@ifset PERL +@item /@var{regexp}/S +@itemx \%@var{regexp}%S +@cindex @value{SSEDEXT}, @code{S} modifier +@cindex Perl-style regular expressions, single line +The @code{S} modifier to regular-expression matching is only valid +in Perl mode and specifies that the dot character (@code{.}) will +match the newline character too. @code{S} stands for @cite{single-line}. +@end ifset + +@ifset PERL +@item /@var{regexp}/X +@itemx \%@var{regexp}%X +@cindex @value{SSEDEXT}, @code{X} modifier +@cindex Perl-style regular expressions, extended +The @code{X} modifier to regular-expression matching is also +valid in Perl mode only. If it is used, whitespace in the +pattern (other than in a character class) and +characters between a @kbd{#} outside a character class and the +next newline character are ignored. An escaping backslash +can be used to include a whitespace or @kbd{#} character as part +of the pattern. +@end ifset +@end table + +If no addresses are given, then all lines are matched; +if one address is given, then only lines matching that +address are matched. + +@cindex Range of lines +@cindex Several lines, selecting +An address range can be specified by specifying two addresses +separated by a comma (@code{,}). An address range matches lines +starting from where the first address matches, and continues +until the second address matches (inclusively). + +If the second address is a @var{regexp}, then checking for the +ending match will start with the line @emph{following} the +line which matched the first address: a range will always +span at least two lines (except of course if the input stream +ends). + +If the second address is a @var{number} less than (or equal to) +the line matching the first address, then only the one line is +matched. + +@cindex Special addressing forms +@cindex Range with start address of zero +@cindex Zero, as range start address +@cindex @var{addr1},+N +@cindex @var{addr1},~N +@cindex @acronym{GNU} extensions, special two-address forms +@cindex @acronym{GNU} extensions, @code{0} address +@cindex @acronym{GNU} extensions, 0,@var{addr2} addressing +@cindex @acronym{GNU} extensions, @var{addr1},+@var{N} addressing +@cindex @acronym{GNU} extensions, @var{addr1},~@var{N} addressing +@value{SSED} also supports some special two-address forms; all these +are @acronym{GNU} extensions: +@table @code +@item 0,/@var{regexp}/ +A line number of @code{0} can be used in an address specification like +@code{0,/@var{regexp}/} so that @command{sed} will try to match +@var{regexp} in the first input line too. In other words, +@code{0,/@var{regexp}/} is similar to @code{1,/@var{regexp}/}, +except that if @var{addr2} matches the very first line of input the +@code{0,/@var{regexp}/} form will consider it to end the range, whereas +the @code{1,/@var{regexp}/} form will match the beginning of its range and +hence make the range span up to the @emph{second} occurrence of the +regular expression. + +Note that this is the only place where the @code{0} address makes +sense; there is no 0-th line and commands which are given the @code{0} +address in any other way will give an error. + +@item @var{addr1},+@var{N} +Matches @var{addr1} and the @var{N} lines following @var{addr1}. + +@item @var{addr1},~@var{N} +Matches @var{addr1} and the lines following @var{addr1} +until the next line whose input line number is a multiple of @var{N}. +@end table + +@cindex Excluding lines +@cindex Selecting non-matching lines +Appending the @code{!} character to the end of an address +specification negates the sense of the match. +That is, if the @code{!} character follows an address range, +then only lines which do @emph{not} match the address range +will be selected. +This also works for singleton addresses, +and, perhaps perversely, for the null address. + + +@node Regular Expressions +@section Overview of Regular Expression Syntax + +To know how to use @command{sed}, people should understand regular +expressions (@dfn{regexp} for short). A regular expression +is a pattern that is matched against a +subject string from left to right. Most characters are +@dfn{ordinary}: they stand for +themselves in a pattern, and match the corresponding characters +in the subject. As a trivial example, the pattern + +@example +The quick brown fox +@end example + +@noindent +matches a portion of a subject string that is identical to +itself. The power of regular expressions comes from the +ability to include alternatives and repetitions in the pattern. +These are encoded in the pattern by the use of @dfn{special characters}, +which do not stand for themselves but instead +are interpreted in some special way. Here is a brief description +of regular expression syntax as used in @command{sed}. + +@table @code +@item @var{char} +A single ordinary character matches itself. + +@item * +@cindex @acronym{GNU} extensions, to basic regular expressions +Matches a sequence of zero or more instances of matches for the +preceding regular expression, which must be an ordinary character, a +special character preceded by @code{\}, a @code{.}, a grouped regexp +(see below), or a bracket expression. As a @acronym{GNU} extension, a +postfixed regular expression can also be followed by @code{*}; for +example, @code{a**} is equivalent to @code{a*}. @acronym{POSIX} +1003.1-2001 says that @code{*} stands for itself when it appears at +the start of a regular expression or subexpression, but many +non@acronym{GNU} implementations do not support this and portable +scripts should instead use @code{\*} in these contexts. + +@item \+ +@cindex @acronym{GNU} extensions, to basic regular expressions +As @code{*}, but matches one or more. It is a @acronym{GNU} extension. + +@item \? +@cindex @acronym{GNU} extensions, to basic regular expressions +As @code{*}, but only matches zero or one. It is a @acronym{GNU} extension. + +@item \@{@var{i}\@} +As @code{*}, but matches exactly @var{i} sequences (@var{i} is a +decimal integer; for portability, keep it between 0 and 255 +inclusive). + +@item \@{@var{i},@var{j}\@} +Matches between @var{i} and @var{j}, inclusive, sequences. + +@item \@{@var{i},\@} +Matches more than or equal to @var{i} sequences. + +@item \(@var{regexp}\) +Groups the inner @var{regexp} as a whole, this is used to: + +@itemize @bullet +@item +@cindex @acronym{GNU} extensions, to basic regular expressions +Apply postfix operators, like @code{\(abcd\)*}: +this will search for zero or more whole sequences +of @samp{abcd}, while @code{abcd*} would search +for @samp{abc} followed by zero or more occurrences +of @samp{d}. Note that support for @code{\(abcd\)*} is +required by @acronym{POSIX} 1003.1-2001, but many non-@acronym{GNU} +implementations do not support it and hence it is not universally +portable. + +@item +Use back references (see below). +@end itemize + +@item . +Matches any character, including newline. + +@item ^ +Matches the null string at beginning of the pattern space, i.e. what +appears after the circumflex must appear at the beginning of the +pattern space. + +In most scripts, pattern space is initialized to the content of each +line (@pxref{Execution Cycle, , How @code{sed} works}). So, it is a +useful simplification to think of @code{^#include} as matching only +lines where @samp{#include} is the first thing on line---if there are +spaces before, for example, the match fails. This simplification is +valid as long as the original content of pattern space is not modified, +for example with an @code{s} command. + +@code{^} acts as a special character only at the beginning of the +regular expression or subexpression (that is, after @code{\(} or +@code{\|}). Portable scripts should avoid @code{^} at the beginning of +a subexpression, though, as @acronym{POSIX} allows implementations that +treat @code{^} as an ordinary character in that context. + +@item $ +It is the same as @code{^}, but refers to end of pattern space. +@code{$} also acts as a special character only at the end +of the regular expression or subexpression (that is, before @code{\)} +or @code{\|}), and its use at the end of a subexpression is not +portable. + + +@item [@var{list}] +@itemx [^@var{list}] +Matches any single character in @var{list}: for example, +@code{[aeiou]} matches all vowels. A list may include +sequences like @code{@var{char1}-@var{char2}}, which +matches any character between (inclusive) @var{char1} +and @var{char2}. + +A leading @code{^} reverses the meaning of @var{list}, so that +it matches any single character @emph{not} in @var{list}. To include +@code{]} in the list, make it the first character (after +the @code{^} if needed), to include @code{-} in the list, +make it the first or last; to include @code{^} put +it after the first character. + +@cindex @code{POSIXLY_CORRECT} behavior, bracket expressions +The characters @code{$}, @code{*}, @code{.}, @code{[}, and @code{\} +are normally not special within @var{list}. For example, @code{[\*]} +matches either @samp{\} or @samp{*}, because the @code{\} is not +special here. However, strings like @code{[.ch.]}, @code{[=a=]}, and +@code{[:space:]} are special within @var{list} and represent collating +symbols, equivalence classes, and character classes, respectively, and +@code{[} is therefore special within @var{list} when it is followed by +@code{.}, @code{=}, or @code{:}. Also, when not in +@env{POSIXLY_CORRECT} mode, special escapes like @code{\n} and +@code{\t} are recognized within @var{list}. @xref{Escapes}. + +@item @var{regexp1}\|@var{regexp2} +@cindex @acronym{GNU} extensions, to basic regular expressions +Matches either @var{regexp1} or @var{regexp2}. Use +parentheses to use complex alternative regular expressions. +The matching process tries each alternative in turn, from +left to right, and the first one that succeeds is used. +It is a @acronym{GNU} extension. + +@item @var{regexp1}@var{regexp2} +Matches the concatenation of @var{regexp1} and @var{regexp2}. +Concatenation binds more tightly than @code{\|}, @code{^}, and +@code{$}, but less tightly than the other regular expression +operators. + +@item \@var{digit} +Matches the @var{digit}-th @code{\(@dots{}\)} parenthesized +subexpression in the regular expression. This is called a @dfn{back +reference}. Subexpressions are implicity numbered by counting +occurrences of @code{\(} left-to-right. + +@item \n +Matches the newline character. + +@item \@var{char} +Matches @var{char}, where @var{char} is one of @code{$}, +@code{*}, @code{.}, @code{[}, @code{\}, or @code{^}. +Note that the only C-like +backslash sequences that you can portably assume to be +interpreted are @code{\n} and @code{\\}; in particular +@code{\t} is not portable, and matches a @samp{t} under most +implementations of @command{sed}, rather than a tab character. + +@end table + +@cindex Greedy regular expression matching +Note that the regular expression matcher is greedy, i.e., matches +are attempted from left to right and, if two or more matches are +possible starting at the same character, it selects the longest. + +@noindent +Examples: +@table @samp +@item abcdef +Matches @samp{abcdef}. + +@item a*b +Matches zero or more @samp{a}s followed by a single +@samp{b}. For example, @samp{b} or @samp{aaaaab}. + +@item a\?b +Matches @samp{b} or @samp{ab}. + +@item a\+b\+ +Matches one or more @samp{a}s followed by one or more +@samp{b}s: @samp{ab} is the shortest possible match, but +other examples are @samp{aaaab} or @samp{abbbbb} or +@samp{aaaaaabbbbbbb}. + +@item .* +@itemx .\+ +These two both match all the characters in a string; +however, the first matches every string (including the empty +string), while the second matches only strings containing +at least one character. + +@item ^main.*(.*) +This matches a string starting with @samp{main}, +followed by an opening and closing +parenthesis. The @samp{n}, @samp{(} and @samp{)} need not +be adjacent. + +@item ^# +This matches a string beginning with @samp{#}. + +@item \\$ +This matches a string ending with a single backslash. The +regexp contains two backslashes for escaping. + +@item \$ +Instead, this matches a string consisting of a single dollar sign, +because it is escaped. + +@item [a-zA-Z0-9] +In the C locale, this matches any @acronym{ASCII} letters or digits. + +@item [^ @kbd{tab}]\+ +(Here @kbd{tab} stands for a single tab character.) +This matches a string of one or more +characters, none of which is a space or a tab. +Usually this means a word. + +@item ^\(.*\)\n\1$ +This matches a string consisting of two equal substrings separated by +a newline. + +@item .\@{9\@}A$ +This matches nine characters followed by an @samp{A}. + +@item ^.\@{15\@}A +This matches the start of a string that contains 16 characters, +the last of which is an @samp{A}. + +@end table + + + +@node Common Commands +@section Often-Used Commands + +If you use @command{sed} at all, you will quite likely want to know +these commands. + +@table @code +@item # +[No addresses allowed.] + +@findex # (comments) +@cindex Comments, in scripts +The @code{#} character begins a comment; +the comment continues until the next newline. + +@cindex Portability, comments +If you are concerned about portability, be aware that +some implementations of @command{sed} (which are not @sc{posix} +conformant) may only support a single one-line comment, +and then only when the very first character of the script is a @code{#}. + +@findex -n, forcing from within a script +@cindex Caveat --- #n on first line +Warning: if the first two characters of the @command{sed} script +are @code{#n}, then the @option{-n} (no-autoprint) option is forced. +If you want to put a comment in the first line of your script +and that comment begins with the letter @samp{n} +and you do not want this behavior, +then be sure to either use a capital @samp{N}, +or place at least one space before the @samp{n}. + +@item q [@var{exit-code}] +This command only accepts a single address. + +@findex q (quit) command +@cindex @value{SSEDEXT}, returning an exit code +@cindex Quitting +Exit @command{sed} without processing any more commands or input. +Note that the current pattern space is printed if auto-print is +not disabled with the @option{-n} options. The ability to return +an exit code from the @command{sed} script is a @value{SSED} extension. + +@item d +@findex d (delete) command +@cindex Text, deleting +Delete the pattern space; +immediately start next cycle. + +@item p +@findex p (print) command +@cindex Text, printing +Print out the pattern space (to the standard output). +This command is usually only used in conjunction with the @option{-n} +command-line option. + +@item n +@findex n (next-line) command +@cindex Next input line, replace pattern space with +@cindex Read next input line +If auto-print is not disabled, print the pattern space, +then, regardless, replace the pattern space with the next line of input. +If there is no more input then @command{sed} exits without processing +any more commands. + +@item @{ @var{commands} @} +@findex @{@} command grouping +@cindex Grouping commands +@cindex Command groups +A group of commands may be enclosed between +@code{@{} and @code{@}} characters. +This is particularly useful when you want a group of commands +to be triggered by a single address (or address-range) match. + +@end table + +@node The "s" Command +@section The @code{s} Command + +The syntax of the @code{s} (as in substitute) command is +@samp{s/@var{regexp}/@var{replacement}/@var{flags}}. The @code{/} +characters may be uniformly replaced by any other single +character within any given @code{s} command. The @code{/} +character (or whatever other character is used in its stead) +can appear in the @var{regexp} or @var{replacement} +only if it is preceded by a @code{\} character. + +The @code{s} command is probably the most important in @command{sed} +and has a lot of different options. Its basic concept is simple: +the @code{s} command attempts to match the pattern +space against the supplied @var{regexp}; if the match is +successful, then that portion of the pattern +space which was matched is replaced with @var{replacement}. + +@cindex Backreferences, in regular expressions +@cindex Parenthesized substrings +The @var{replacement} can contain @code{\@var{n}} (@var{n} being +a number from 1 to 9, inclusive) references, which refer to +the portion of the match which is contained between the @var{n}th +@code{\(} and its matching @code{\)}. +Also, the @var{replacement} can contain unescaped @code{&} +characters which reference the whole matched portion +of the pattern space. +@cindex @value{SSEDEXT}, case modifiers in @code{s} commands +Finally, as a @value{SSED} extension, you can include a +special sequence made of a backslash and one of the letters +@code{L}, @code{l}, @code{U}, @code{u}, or @code{E}. +The meaning is as follows: + +@table @code +@item \L +Turn the replacement +to lowercase until a @code{\U} or @code{\E} is found, + +@item \l +Turn the +next character to lowercase, + +@item \U +Turn the replacement to uppercase +until a @code{\L} or @code{\E} is found, + +@item \u +Turn the next character +to uppercase, + +@item \E +Stop case conversion started by @code{\L} or @code{\U}. +@end table + +When the @code{g} flag is being used, case conversion does not +propagate from one occurrence of the regular expression to +another. For example, when the following command is executed +with @samp{a-b-} in pattern space: +@example +s/\(b\?\)-/x\u\1/g +@end example + +@noindent +the output is @samp{axxB}. When replacing the first @samp{-}, +the @samp{\u} sequence only affects the empty replacement of +@samp{\1}. It does not affect the @code{x} character that is +added to pattern space when replacing @code{b-} with @code{xB}. + +On the other hand, @code{\l} and @code{\u} do affect the remainder +of the replacement text if they are followed by an empty substitution. +With @samp{a-b-} in pattern space, the following command: +@example +s/\(b\?\)-/\u\1x/g +@end example + +@noindent +will replace @samp{-} with @samp{X} (uppercase) and @samp{b-} with +@samp{Bx}. If this behavior is undesirable, you can prevent it by +adding a @samp{\E} sequence---after @samp{\1} in this case. + +To include a literal @code{\}, @code{&}, or newline in the final +replacement, be sure to precede the desired @code{\}, @code{&}, +or newline in the @var{replacement} with a @code{\}. + +@findex s command, option flags +@cindex Substitution of text, options +The @code{s} command can be followed by zero or more of the +following @var{flags}: + +@table @code +@item g +@cindex Global substitution +@cindex Replacing all text matching regexp in a line +Apply the replacement to @emph{all} matches to the @var{regexp}, +not just the first. + +@item @var{number} +@cindex Replacing only @var{n}th match of regexp in a line +Only replace the @var{number}th match of the @var{regexp}. + +@cindex @acronym{GNU} extensions, @code{g} and @var{number} modifier interaction in @code{s} command +@cindex Mixing @code{g} and @var{number} modifiers in the @code{s} command +Note: the @sc{posix} standard does not specify what should happen +when you mix the @code{g} and @var{number} modifiers, +and currently there is no widely agreed upon meaning +across @command{sed} implementations. +For @value{SSED}, the interaction is defined to be: +ignore matches before the @var{number}th, +and then match and replace all matches from +the @var{number}th on. + +@item p +@cindex Text, printing after substitution +If the substitution was made, then print the new pattern space. + +Note: when both the @code{p} and @code{e} options are specified, +the relative ordering of the two produces very different results. +In general, @code{ep} (evaluate then print) is what you want, +but operating the other way round can be useful for debugging. +For this reason, the current version of @value{SSED} interprets +specially the presence of @code{p} options both before and after +@code{e}, printing the pattern space before and after evaluation, +while in general flags for the @code{s} command show their +effect just once. This behavior, although documented, might +change in future versions. + +@item w @var{file-name} +@cindex Text, writing to a file after substitution +@cindex @value{SSEDEXT}, @file{/dev/stdout} file +@cindex @value{SSEDEXT}, @file{/dev/stderr} file +If the substitution was made, then write out the result to the named file. +As a @value{SSED} extension, two special values of @var{file-name} are +supported: @file{/dev/stderr}, which writes the result to the standard +error, and @file{/dev/stdout}, which writes to the standard +output.@footnote{This is equivalent to @code{p} unless the @option{-i} +option is being used.} + +@item e +@cindex Evaluate Bourne-shell commands, after substitution +@cindex Subprocesses +@cindex @value{SSEDEXT}, evaluating Bourne-shell commands +@cindex @value{SSEDEXT}, subprocesses +This command allows one to pipe input from a shell command +into pattern space. If a substitution was made, the command +that is found in pattern space is executed and pattern space +is replaced with its output. A trailing newline is suppressed; +results are undefined if the command to be executed contains +a @sc{nul} character. This is a @value{SSED} extension. + +@item I +@itemx i +@cindex @acronym{GNU} extensions, @code{I} modifier +@cindex Case-insensitive matching +@ifset PERL +@cindex Perl-style regular expressions, case-insensitive +@end ifset +The @code{I} modifier to regular-expression matching is a @acronym{GNU} +extension which makes @command{sed} match @var{regexp} in a +case-insensitive manner. + +@item M +@itemx m +@cindex @value{SSEDEXT}, @code{M} modifier +@ifset PERL +@cindex Perl-style regular expressions, multiline +@end ifset +The @code{M} modifier to regular-expression matching is a @value{SSED} +extension which directs @value{SSED} to match the regular expression +in @cite{multi-line} mode. The modifier causes @code{^} and @code{$} to +match respectively (in addition to the normal behavior) the empty string +after a newline, and the empty string before a newline. There are +special character sequences +@ifset PERL +(@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'} +in basic or extended regular expression modes) +@end ifset +@ifclear PERL +(@code{\`} and @code{\'}) +@end ifclear +which always match the beginning or the end of the buffer. +In addition, +@ifset PERL +just like in Perl mode without the @code{S} modifier, +@end ifset +the period character does not match a new-line character in +multi-line mode. + +@ifset PERL +@item S +@itemx s +@cindex @value{SSEDEXT}, @code{S} modifier +@cindex Perl-style regular expressions, single line +The @code{S} modifier to regular-expression matching is only valid +in Perl mode and specifies that the dot character (@code{.}) will +match the newline character too. @code{S} stands for @cite{single-line}. +@end ifset + +@ifset PERL +@item X +@itemx x +@cindex @value{SSEDEXT}, @code{X} modifier +@cindex Perl-style regular expressions, extended +The @code{X} modifier to regular-expression matching is also +valid in Perl mode only. If it is used, whitespace in the +pattern (other than in a character class) and +characters between a @kbd{#} outside a character class and the +next newline character are ignored. An escaping backslash +can be used to include a whitespace or @kbd{#} character as part +of the pattern. +@end ifset +@end table + + +@node Other Commands +@section Less Frequently-Used Commands + +Though perhaps less frequently used than those in the previous +section, some very small yet useful @command{sed} scripts can be built with +these commands. + +@table @code +@item y/@var{source-chars}/@var{dest-chars}/ +(The @code{/} characters may be uniformly replaced by +any other single character within any given @code{y} command.) + +@findex y (transliterate) command +@cindex Transliteration +Transliterate any characters in the pattern space which match +any of the @var{source-chars} with the corresponding character +in @var{dest-chars}. + +Instances of the @code{/} (or whatever other character is used in its stead), +@code{\}, or newlines can appear in the @var{source-chars} or @var{dest-chars} +lists, provide that each instance is escaped by a @code{\}. +The @var{source-chars} and @var{dest-chars} lists @emph{must} +contain the same number of characters (after de-escaping). + +@item a\ +@itemx @var{text} +@cindex @value{SSEDEXT}, two addresses supported by most commands +As a @acronym{GNU} extension, this command accepts two addresses. + +@findex a (append text lines) command +@cindex Appending text after a line +@cindex Text, appending +Queue the lines of text which follow this command +(each but the last ending with a @code{\}, +which are removed from the output) +to be output at the end of the current cycle, +or when the next input line is read. + +Escape sequences in @var{text} are processed, so you should +use @code{\\} in @var{text} to print a single backslash. + +As a @acronym{GNU} extension, if between the @code{a} and the newline there is +other than a whitespace-@code{\} sequence, then the text of this line, +starting at the first non-whitespace character after the @code{a}, +is taken as the first line of the @var{text} block. +(This enables a simplification in scripting a one-line add.) +This extension also works with the @code{i} and @code{c} commands. + +@item i\ +@itemx @var{text} +@cindex @value{SSEDEXT}, two addresses supported by most commands +As a @acronym{GNU} extension, this command accepts two addresses. + +@findex i (insert text lines) command +@cindex Inserting text before a line +@cindex Text, insertion +Immediately output the lines of text which follow this command +(each but the last ending with a @code{\}, +which are removed from the output). + +@item c\ +@itemx @var{text} +@findex c (change to text lines) command +@cindex Replacing selected lines with other text +Delete the lines matching the address or address-range, +and output the lines of text which follow this command +(each but the last ending with a @code{\}, +which are removed from the output) +in place of the last line +(or in place of each line, if no addresses were specified). +A new cycle is started after this command is done, +since the pattern space will have been deleted. + +@item = +@cindex @value{SSEDEXT}, two addresses supported by most commands +As a @acronym{GNU} extension, this command accepts two addresses. + +@findex = (print line number) command +@cindex Printing line number +@cindex Line number, printing +Print out the current input line number (with a trailing newline). + +@item l @var{n} +@findex l (list unambiguously) command +@cindex List pattern space +@cindex Printing text unambiguously +@cindex Line length, setting +@cindex @value{SSEDEXT}, setting line length +Print the pattern space in an unambiguous form: +non-printable characters (and the @code{\} character) +are printed in C-style escaped form; long lines are split, +with a trailing @code{\} character to indicate the split; +the end of each line is marked with a @code{$}. + +@var{n} specifies the desired line-wrap length; +a length of 0 (zero) means to never wrap long lines. If omitted, +the default as specified on the command line is used. The @var{n} +parameter is a @value{SSED} extension. + +@item r @var{filename} +@cindex @value{SSEDEXT}, two addresses supported by most commands +As a @acronym{GNU} extension, this command accepts two addresses. + +@findex r (read file) command +@cindex Read text from a file +@cindex @value{SSEDEXT}, @file{/dev/stdin} file +Queue the contents of @var{filename} to be read and +inserted into the output stream at the end of the current cycle, +or when the next input line is read. +Note that if @var{filename} cannot be read, it is treated as +if it were an empty file, without any error indication. + +As a @value{SSED} extension, the special value @file{/dev/stdin} +is supported for the file name, which reads the contents of the +standard input. + +@item w @var{filename} +@findex w (write file) command +@cindex Write to a file +@cindex @value{SSEDEXT}, @file{/dev/stdout} file +@cindex @value{SSEDEXT}, @file{/dev/stderr} file +Write the pattern space to @var{filename}. +As a @value{SSED} extension, two special values of @var{file-name} are +supported: @file{/dev/stderr}, which writes the result to the standard +error, and @file{/dev/stdout}, which writes to the standard +output.@footnote{This is equivalent to @code{p} unless the @option{-i} +option is being used.} + +The file will be created (or truncated) before the first input line is +read; all @code{w} commands (including instances of the @code{w} flag +on successful @code{s} commands) which refer to the same @var{filename} +are output without closing and reopening the file. + +@item D +@findex D (delete first line) command +@cindex Delete first line from pattern space +If pattern space contains no newline, start a normal new cycle as if +the @code{d} command was issued. Otherwise, delete text in the pattern +space up to the first newline, and restart cycle with the resultant +pattern space, without reading a new line of input. + +@item N +@findex N (append Next line) command +@cindex Next input line, append to pattern space +@cindex Append next input line to pattern space +Add a newline to the pattern space, +then append the next line of input to the pattern space. +If there is no more input then @command{sed} exits without processing +any more commands. + +@item P +@findex P (print first line) command +@cindex Print first line from pattern space +Print out the portion of the pattern space up to the first newline. + +@item h +@findex h (hold) command +@cindex Copy pattern space into hold space +@cindex Replace hold space with copy of pattern space +@cindex Hold space, copying pattern space into +Replace the contents of the hold space with the contents of the pattern space. + +@item H +@findex H (append Hold) command +@cindex Append pattern space to hold space +@cindex Hold space, appending from pattern space +Append a newline to the contents of the hold space, +and then append the contents of the pattern space to that of the hold space. + +@item g +@findex g (get) command +@cindex Copy hold space into pattern space +@cindex Replace pattern space with copy of hold space +@cindex Hold space, copy into pattern space +Replace the contents of the pattern space with the contents of the hold space. + +@item G +@findex G (appending Get) command +@cindex Append hold space to pattern space +@cindex Hold space, appending to pattern space +Append a newline to the contents of the pattern space, +and then append the contents of the hold space to that of the pattern space. + +@item x +@findex x (eXchange) command +@cindex Exchange hold space with pattern space +@cindex Hold space, exchange with pattern space +Exchange the contents of the hold and pattern spaces. + +@end table + + +@node Programming Commands +@section Commands for @command{sed} gurus + +In most cases, use of these commands indicates that you are +probably better off programming in something like @command{awk} +or Perl. But occasionally one is committed to sticking +with @command{sed}, and these commands can enable one to write +quite convoluted scripts. + +@cindex Flow of control in scripts +@table @code +@item : @var{label} +[No addresses allowed.] + +@findex : (label) command +@cindex Labels, in scripts +Specify the location of @var{label} for branch commands. +In all other respects, a no-op. + +@item b @var{label} +@findex b (branch) command +@cindex Branch to a label, unconditionally +@cindex Goto, in scripts +Unconditionally branch to @var{label}. +The @var{label} may be omitted, in which case the next cycle is started. + +@item t @var{label} +@findex t (test and branch if successful) command +@cindex Branch to a label, if @code{s///} succeeded +@cindex Conditional branch +Branch to @var{label} only if there has been a successful @code{s}ubstitution +since the last input line was read or conditional branch was taken. +The @var{label} may be omitted, in which case the next cycle is started. + +@end table + +@node Extended Commands +@section Commands Specific to @value{SSED} + +These commands are specific to @value{SSED}, so you +must use them with care and only when you are sure that +hindering portability is not evil. They allow you to check +for @value{SSED} extensions or to do tasks that are required +quite often, yet are unsupported by standard @command{sed}s. + +@table @code +@item e [@var{command}] +@findex e (evaluate) command +@cindex Evaluate Bourne-shell commands +@cindex Subprocesses +@cindex @value{SSEDEXT}, evaluating Bourne-shell commands +@cindex @value{SSEDEXT}, subprocesses +This command allows one to pipe input from a shell command +into pattern space. Without parameters, the @code{e} command +executes the command that is found in pattern space and +replaces the pattern space with the output; a trailing newline +is suppressed. + +If a parameter is specified, instead, the @code{e} command +interprets it as a command and sends its output to the output stream. +The command can run across multiple lines, all but the last ending with +a back-slash. + +In both cases, the results are undefined if the command to be +executed contains a @sc{nul} character. + +Note that, unlike the @code{r} command, the output of the command will +be printed immediately; the @code{r} command instead delays the output +to the end of the current cycle. + +@item F +@findex F (File name) command +@cindex Printing file name +@cindex File name, printing +Print out the file name of the current input file (with a trailing +newline). + +@item L @var{n} +@findex L (fLow paragraphs) command +@cindex Reformat pattern space +@cindex Reformatting paragraphs +@cindex @value{SSEDEXT}, reformatting paragraphs +@cindex @value{SSEDEXT}, @code{L} command +This @value{SSED} extension fills and joins lines in pattern space +to produce output lines of (at most) @var{n} characters, like +@code{fmt} does; if @var{n} is omitted, the default as specified +on the command line is used. This command is considered a failed +experiment and unless there is enough request (which seems unlikely) +will be removed in future versions. + +@ignore +Blank lines, spaces between words, and indentation are +preserved in the output; successive input lines with different +indentation are not joined; tabs are expanded to 8 columns. + +If the pattern space contains multiple lines, they are joined, but +since the pattern space usually contains a single line, the behavior +of a simple @code{L;d} script is the same as @samp{fmt -s} (i.e., +it does not join short lines to form longer ones). + +@var{n} specifies the desired line-wrap length; if omitted, +the default as specified on the command line is used. +@end ignore + +@item Q [@var{exit-code}] +This command only accepts a single address. + +@findex Q (silent Quit) command +@cindex @value{SSEDEXT}, quitting silently +@cindex @value{SSEDEXT}, returning an exit code +@cindex Quitting +This command is the same as @code{q}, but will not print the +contents of pattern space. Like @code{q}, it provides the +ability to return an exit code to the caller. + +This command can be useful because the only alternative ways +to accomplish this apparently trivial function are to use +the @option{-n} option (which can unnecessarily complicate +your script) or resorting to the following snippet, which +wastes time by reading the whole file without any visible effect: + +@example +:eat +$d @i{@r{Quit silently on the last line}} +N @i{@r{Read another line, silently}} +g @i{@r{Overwrite pattern space each time to save memory}} +b eat +@end example + +@item R @var{filename} +@findex R (read line) command +@cindex Read text from a file +@cindex @value{SSEDEXT}, reading a file a line at a time +@cindex @value{SSEDEXT}, @code{R} command +@cindex @value{SSEDEXT}, @file{/dev/stdin} file +Queue a line of @var{filename} to be read and +inserted into the output stream at the end of the current cycle, +or when the next input line is read. +Note that if @var{filename} cannot be read, or if its end is +reached, no line is appended, without any error indication. + +As with the @code{r} command, the special value @file{/dev/stdin} +is supported for the file name, which reads a line from the +standard input. + +@item T @var{label} +@findex T (test and branch if failed) command +@cindex @value{SSEDEXT}, branch if @code{s///} failed +@cindex Branch to a label, if @code{s///} failed +@cindex Conditional branch +Branch to @var{label} only if there have been no successful +@code{s}ubstitutions since the last input line was read or +conditional branch was taken. The @var{label} may be omitted, +in which case the next cycle is started. + +@item v @var{version} +@findex v (version) command +@cindex @value{SSEDEXT}, checking for their presence +@cindex Requiring @value{SSED} +This command does nothing, but makes @command{sed} fail if +@value{SSED} extensions are not supported, simply because other +versions of @command{sed} do not implement it. In addition, you +can specify the version of @command{sed} that your script +requires, such as @code{4.0.5}. The default is @code{4.0} +because that is the first version that implemented this command. + +This command enables all @value{SSEDEXT} even if +@env{POSIXLY_CORRECT} is set in the environment. + +@item W @var{filename} +@findex W (write first line) command +@cindex Write first line to a file +@cindex @value{SSEDEXT}, writing first line to a file +Write to the given filename the portion of the pattern space up to +the first newline. Everything said under the @code{w} command about +file handling holds here too. + +@item z +@findex z (Zap) command +@cindex @value{SSEDEXT}, emptying pattern space +@cindex Emptying pattern space +This command empties the content of pattern space. It is +usually the same as @samp{s/.*//}, but is more efficient +and works in the presence of invalid multibyte sequences +in the input stream. @sc{posix} mandates that such sequences +are @emph{not} matched by @samp{.}, so that there is no portable +way to clear @command{sed}'s buffers in the middle of the +script in most multibyte locales (including UTF-8 locales). +@end table + +@node Escapes +@section @acronym{GNU} Extensions for Escapes in Regular Expressions + +@cindex @acronym{GNU} extensions, special escapes +Until this chapter, we have only encountered escapes of the form +@samp{\^}, which tell @command{sed} not to interpret the circumflex +as a special character, but rather to take it literally. For +example, @samp{\*} matches a single asterisk rather than zero +or more backslashes. + +@cindex @code{POSIXLY_CORRECT} behavior, escapes +This chapter introduces another kind of escape@footnote{All +the escapes introduced here are @acronym{GNU} +extensions, with the exception of @code{\n}. In basic regular +expression mode, setting @code{POSIXLY_CORRECT} disables them inside +bracket expressions.}---that +is, escapes that are applied to a character or sequence of characters +that ordinarily are taken literally, and that @command{sed} replaces +with a special character. This provides a way +of encoding non-printable characters in patterns in a visible manner. +There is no restriction on the appearance of non-printing characters +in a @command{sed} script but when a script is being prepared in the +shell or by text editing, it is usually easier to use one of +the following escape sequences than the binary character it +represents: + +The list of these escapes is: + +@table @code +@item \a +Produces or matches a @sc{bel} character, that is an ``alert'' (@sc{ascii} 7). + +@item \f +Produces or matches a form feed (@sc{ascii} 12). + +@item \n +Produces or matches a newline (@sc{ascii} 10). + +@item \r +Produces or matches a carriage return (@sc{ascii} 13). + +@item \t +Produces or matches a horizontal tab (@sc{ascii} 9). + +@item \v +Produces or matches a so called ``vertical tab'' (@sc{ascii} 11). + +@item \c@var{x} +Produces or matches @kbd{@sc{Control}-@var{x}}, where @var{x} is +any character. The precise effect of @samp{\c@var{x}} is as follows: +if @var{x} is a lower case letter, it is converted to upper case. +Then bit 6 of the character (hex 40) is inverted. Thus @samp{\cz} becomes +hex 1A, but @samp{\c@{} becomes hex 3B, while @samp{\c;} becomes hex 7B. + +@item \d@var{xxx} +Produces or matches a character whose decimal @sc{ascii} value is @var{xxx}. + +@item \o@var{xxx} +@ifset PERL +@item \@var{xxx} +@end ifset +Produces or matches a character whose octal @sc{ascii} value is @var{xxx}. +@ifset PERL +The syntax without the @code{o} is active in Perl mode, while the one +with the @code{o} is active in the normal or extended @sc{posix} regular +expression modes. +@end ifset + +@item \x@var{xx} +Produces or matches a character whose hexadecimal @sc{ascii} value is @var{xx}. +@end table + +@samp{\b} (backspace) was omitted because of the conflict with +the existing ``word boundary'' meaning. + +Other escapes match a particular character class and are valid only in +regular expressions: + +@table @code +@item \w +Matches any ``word'' character. A ``word'' character is any +letter or digit or the underscore character. + +@item \W +Matches any ``non-word'' character. + +@item \b +Matches a word boundary; that is it matches if the character +to the left is a ``word'' character and the character to the +right is a ``non-word'' character, or vice-versa. + +@item \B +Matches everywhere but on a word boundary; that is it matches +if the character to the left and the character to the right +are either both ``word'' characters or both ``non-word'' +characters. + +@item \` +Matches only at the start of pattern space. This is different +from @code{^} in multi-line mode. + +@item \' +Matches only at the end of pattern space. This is different +from @code{$} in multi-line mode. + +@ifset PERL +@item \G +Match only at the start of pattern space or, when doing a global +substitution using the @code{s///g} command and option, at +the end-of-match position of the prior match. For example, +@samp{s/\Ga/Z/g} will change an initial run of @code{a}s to +a run of @code{Z}s +@end ifset +@end table + +@node Examples +@chapter Some Sample Scripts + +Here are some @command{sed} scripts to guide you in the art of mastering +@command{sed}. + +@menu +Some exotic examples: +* Centering lines:: +* Increment a number:: +* Rename files to lower case:: +* Print bash environment:: +* Reverse chars of lines:: + +Emulating standard utilities: +* tac:: Reverse lines of files +* cat -n:: Numbering lines +* cat -b:: Numbering non-blank lines +* wc -c:: Counting chars +* wc -w:: Counting words +* wc -l:: Counting lines +* head:: Printing the first lines +* tail:: Printing the last lines +* uniq:: Make duplicate lines unique +* uniq -d:: Print duplicated lines of input +* uniq -u:: Remove all duplicated lines +* cat -s:: Squeezing blank lines +@end menu + +@node Centering lines +@section Centering Lines + +This script centers all lines of a file on a 80 columns width. +To change that width, the number in @code{\@{@dots{}\@}} must be +replaced, and the number of added spaces also must be changed. + +Note how the buffer commands are used to separate parts in +the regular expressions to be matched---this is a common +technique. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +# Put 80 spaces in the buffer +1 @{ + x + s/^$/ / + s/^.*$/&&&&&&&&/ + x +@} + +# del leading and trailing spaces +y/@kbd{tab}/ / +s/^ *// +s/ *$// + +# add a newline and 80 spaces to end of line +G + +# keep first 81 chars (80 + a newline) +s/^\(.\@{81\@}\).*$/\1/ + +# \2 matches half of the spaces, which are moved to the beginning +s/^\(.*\)\n\(.*\)\2/\2\1/ +@end example +@c end--------------------------------------------- + +@node Increment a number +@section Increment a Number + +This script is one of a few that demonstrate how to do arithmetic +in @command{sed}. This is indeed possible,@footnote{@command{sed} guru Greg +Ubben wrote an implementation of the @command{dc} @sc{rpn} calculator! +It is distributed together with sed.} but must be done manually. + +To increment one number you just add 1 to last digit, replacing +it by the following digit. There is one exception: when the digit +is a nine the previous digits must be also incremented until you +don't have a nine. + +This solution by Bruno Haible is very clever and smart because +it uses a single buffer; if you don't have this limitation, the +algorithm used in @ref{cat -n, Numbering lines}, is faster. +It works by replacing trailing nines with an underscore, then +using multiple @code{s} commands to increment the last digit, +and then again substituting underscores with zeros. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +/[^0-9]/ d + +# replace all trailing 9s by _ (any other character except digits, could +# be used) +:d +s/9\(_*\)$/_\1/ +td + +# incr last digit only. The first line adds a most-significant +# digit of 1 if we have to add a digit. + +s/^\(_*\)$/1\1/; tn +s/8\(_*\)$/9\1/; tn +s/7\(_*\)$/8\1/; tn +s/6\(_*\)$/7\1/; tn +s/5\(_*\)$/6\1/; tn +s/4\(_*\)$/5\1/; tn +s/3\(_*\)$/4\1/; tn +s/2\(_*\)$/3\1/; tn +s/1\(_*\)$/2\1/; tn +s/0\(_*\)$/1\1/; tn + +:n +y/_/0/ +@end example +@c end--------------------------------------------- + +@node Rename files to lower case +@section Rename Files to Lower Case + +This is a pretty strange use of @command{sed}. We transform text, and +transform it to be shell commands, then just feed them to shell. +Don't worry, even worse hacks are done when using @command{sed}; I have +seen a script converting the output of @command{date} into a @command{bc} +program! + +The main body of this is the @command{sed} script, which remaps the name +from lower to upper (or vice-versa) and even checks out +if the remapped name is the same as the original name. +Note how the script is parameterized using shell +variables and proper quoting. + +@c start------------------------------------------- +@example +#! /bin/sh +# rename files to lower/upper case... +# +# usage: +# move-to-lower * +# move-to-upper * +# or +# move-to-lower -R . +# move-to-upper -R . +# + +help() +@{ + cat << eof +Usage: $0 [-n] [-r] [-h] files... + +-n do nothing, only see what would be done +-R recursive (use find) +-h this message +files files to remap to lower case + +Examples: + $0 -n * (see if everything is ok, then...) + $0 * + + $0 -R . + +eof +@} + +apply_cmd='sh' +finder='echo "$@@" | tr " " "\n"' +files_only= + +while : +do + case "$1" in + -n) apply_cmd='cat' ;; + -R) finder='find "$@@" -type f';; + -h) help ; exit 1 ;; + *) break ;; + esac + shift +done + +if [ -z "$1" ]; then + echo Usage: $0 [-h] [-n] [-r] files... + exit 1 +fi + +LOWER='abcdefghijklmnopqrstuvwxyz' +UPPER='ABCDEFGHIJKLMNOPQRSTUVWXYZ' + +case `basename $0` in + *upper*) TO=$UPPER; FROM=$LOWER ;; + *) FROM=$UPPER; TO=$LOWER ;; +esac + +eval $finder | sed -n ' + +# remove all trailing slashes +s/\/*$// + +# add ./ if there is no path, only a filename +/\//! s/^/.\// + +# save path+filename +h + +# remove path +s/.*\/// + +# do conversion only on filename +y/'$FROM'/'$TO'/ + +# now line contains original path+file, while +# hold space contains the new filename +x + +# add converted file name to line, which now contains +# path/file-name\nconverted-file-name +G + +# check if converted file name is equal to original file name, +# if it is, do not print anything +/^.*\/\(.*\)\n\1/b + +# escape special characters for the shell +s/["$`\\]/\\&/g + +# now, transform path/fromfile\n, into +# mv path/fromfile path/tofile and print it +s/^\(.*\/\)\(.*\)\n\(.*\)$/mv "\1\2" "\1\3"/p + +' | $apply_cmd +@end example +@c end--------------------------------------------- + +@node Print bash environment +@section Print @command{bash} Environment + +This script strips the definition of the shell functions +from the output of the @command{set} Bourne-shell command. + +@c start------------------------------------------- +@example +#!/bin/sh + +set | sed -n ' +:x + +@ifinfo +# if no occurrence of "=()" print and load next line +@end ifinfo +@ifnotinfo +# if no occurrence of @samp{=()} print and load next line +@end ifnotinfo +/=()/! @{ p; b; @} +/ () $/! @{ p; b; @} + +# possible start of functions section +# save the line in case this is a var like FOO="() " +h + +# if the next line has a brace, we quit because +# nothing comes after functions +n +/^@{/ q + +# print the old line +x; p + +# work on the new line now +x; bx +' +@end example +@c end--------------------------------------------- + +@node Reverse chars of lines +@section Reverse Characters of Lines + +This script can be used to reverse the position of characters +in lines. The technique moves two characters at a time, hence +it is faster than more intuitive implementations. + +Note the @code{tx} command before the definition of the label. +This is often needed to reset the flag that is tested by +the @code{t} command. + +Imaginative readers will find uses for this script. An example +is reversing the output of @command{banner}.@footnote{This requires +another script to pad the output of banner; for example + +@example +#! /bin/sh + +banner -w $1 $2 $3 $4 | + sed -e :a -e '/^.\@{0,'$1'\@}$/ @{ s/$/ /; ba; @}' | + ~/sedscripts/reverseline.sed +@end example +} + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +/../! b + +# Reverse a line. Begin embedding the line between two newlines +s/^.*$/\ +&\ +/ + +# Move first character at the end. The regexp matches until +# there are zero or one characters between the markers +tx +:x +s/\(\n.\)\(.*\)\(.\n\)/\3\2\1/ +tx + +# Remove the newline markers +s/\n//g +@end example +@c end--------------------------------------------- + +@node tac +@section Reverse Lines of Files + +This one begins a series of totally useless (yet interesting) +scripts emulating various Unix commands. This, in particular, +is a @command{tac} workalike. + +Note that on implementations other than @acronym{GNU} @command{sed} +@ifset PERL +and @value{SSED} +@end ifset +this script might easily overflow internal buffers. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +# reverse all lines of input, i.e. first line became last, ... + +# from the second line, the buffer (which contains all previous lines) +# is *appended* to current line, so, the order will be reversed +1! G + +# on the last line we're done -- print everything +$ p + +# store everything on the buffer again +h +@end example +@c end--------------------------------------------- + +@node cat -n +@section Numbering Lines + +This script replaces @samp{cat -n}; in fact it formats its output +exactly like @acronym{GNU} @command{cat} does. + +Of course this is completely useless and for two reasons: first, +because somebody else did it in C, second, because the following +Bourne-shell script could be used for the same purpose and would +be much faster: + +@c start------------------------------------------- +@example +#! /bin/sh +sed -e "=" $@@ | sed -e ' + s/^/ / + N + s/^ *\(......\)\n/\1 / +' +@end example +@c end--------------------------------------------- + +It uses @command{sed} to print the line number, then groups lines two +by two using @code{N}. Of course, this script does not teach as much as +the one presented below. + +The algorithm used for incrementing uses both buffers, so the line +is printed as soon as possible and then discarded. The number +is split so that changing digits go in a buffer and unchanged ones go +in the other; the changed digits are modified in a single step +(using a @code{y} command). The line number for the next line +is then composed and stored in the hold space, to be used in the +next iteration. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +# Prime the pump on the first line +x +/^$/ s/^.*$/1/ + +# Add the correct line number before the pattern +G +h + +# Format it and print it +s/^/ / +s/^ *\(......\)\n/\1 /p + +# Get the line number from hold space; add a zero +# if we're going to add a digit on the next line +g +s/\n.*$// +/^9*$/ s/^/0/ + +# separate changing/unchanged digits with an x +s/.9*$/x&/ + +# keep changing digits in hold space +h +s/^.*x// +y/0123456789/1234567890/ +x + +# keep unchanged digits in pattern space +s/x.*$// + +# compose the new number, remove the newline implicitly added by G +G +s/\n// +h +@end example +@c end--------------------------------------------- + +@node cat -b +@section Numbering Non-blank Lines + +Emulating @samp{cat -b} is almost the same as @samp{cat -n}---we only +have to select which lines are to be numbered and which are not. + +The part that is common to this script and the previous one is +not commented to show how important it is to comment @command{sed} +scripts properly... + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +/^$/ @{ + p + b +@} + +# Same as cat -n from now +x +/^$/ s/^.*$/1/ +G +h +s/^/ / +s/^ *\(......\)\n/\1 /p +x +s/\n.*$// +/^9*$/ s/^/0/ +s/.9*$/x&/ +h +s/^.*x// +y/0123456789/1234567890/ +x +s/x.*$// +G +s/\n// +h +@end example +@c end--------------------------------------------- + +@node wc -c +@section Counting Characters + +This script shows another way to do arithmetic with @command{sed}. +In this case we have to add possibly large numbers, so implementing +this by successive increments would not be feasible (and possibly +even more complicated to contrive than this script). + +The approach is to map numbers to letters, kind of an abacus +implemented with @command{sed}. @samp{a}s are units, @samp{b}s are +tens and so on: we simply add the number of characters +on the current line as units, and then propagate the carry +to tens, hundreds, and so on. + +As usual, running totals are kept in hold space. + +On the last line, we convert the abacus form back to decimal. +For the sake of variety, this is done with a loop rather than +with some 80 @code{s} commands@footnote{Some implementations +have a limit of 199 commands per script}: first we +convert units, removing @samp{a}s from the number; then we +rotate letters so that tens become @samp{a}s, and so on +until no more letters remain. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +# Add n+1 a's to hold space (+1 is for the newline) +s/./a/g +H +x +s/\n/a/ + +# Do the carry. The t's and b's are not necessary, +# but they do speed up the thing +t a +: a; s/aaaaaaaaaa/b/g; t b; b done +: b; s/bbbbbbbbbb/c/g; t c; b done +: c; s/cccccccccc/d/g; t d; b done +: d; s/dddddddddd/e/g; t e; b done +: e; s/eeeeeeeeee/f/g; t f; b done +: f; s/ffffffffff/g/g; t g; b done +: g; s/gggggggggg/h/g; t h; b done +: h; s/hhhhhhhhhh//g + +: done +$! @{ + h + b +@} + +# On the last line, convert back to decimal + +: loop +/a/! s/[b-h]*/&0/ +s/aaaaaaaaa/9/ +s/aaaaaaaa/8/ +s/aaaaaaa/7/ +s/aaaaaa/6/ +s/aaaaa/5/ +s/aaaa/4/ +s/aaa/3/ +s/aa/2/ +s/a/1/ + +: next +y/bcdefgh/abcdefg/ +/[a-h]/ b loop +p +@end example +@c end--------------------------------------------- + +@node wc -w +@section Counting Words + +This script is almost the same as the previous one, once each +of the words on the line is converted to a single @samp{a} +(in the previous script each letter was changed to an @samp{a}). + +It is interesting that real @command{wc} programs have optimized +loops for @samp{wc -c}, so they are much slower at counting +words rather than characters. This script's bottleneck, +instead, is arithmetic, and hence the word-counting one +is faster (it has to manage smaller numbers). + +Again, the common parts are not commented to show the importance +of commenting @command{sed} scripts. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +# Convert words to a's +s/[ @kbd{tab}][ @kbd{tab}]*/ /g +s/^/ / +s/ [^ ][^ ]*/a /g +s/ //g + +# Append them to hold space +H +x +s/\n// + +# From here on it is the same as in wc -c. +/aaaaaaaaaa/! bx; s/aaaaaaaaaa/b/g +/bbbbbbbbbb/! bx; s/bbbbbbbbbb/c/g +/cccccccccc/! bx; s/cccccccccc/d/g +/dddddddddd/! bx; s/dddddddddd/e/g +/eeeeeeeeee/! bx; s/eeeeeeeeee/f/g +/ffffffffff/! bx; s/ffffffffff/g/g +/gggggggggg/! bx; s/gggggggggg/h/g +s/hhhhhhhhhh//g +:x +$! @{ h; b; @} +:y +/a/! s/[b-h]*/&0/ +s/aaaaaaaaa/9/ +s/aaaaaaaa/8/ +s/aaaaaaa/7/ +s/aaaaaa/6/ +s/aaaaa/5/ +s/aaaa/4/ +s/aaa/3/ +s/aa/2/ +s/a/1/ +y/bcdefgh/abcdefg/ +/[a-h]/ by +p +@end example +@c end--------------------------------------------- + +@node wc -l +@section Counting Lines + +No strange things are done now, because @command{sed} gives us +@samp{wc -l} functionality for free!!! Look: + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf +$= +@end example +@c end--------------------------------------------- + +@node head +@section Printing the First Lines + +This script is probably the simplest useful @command{sed} script. +It displays the first 10 lines of input; the number of displayed +lines is right before the @code{q} command. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f +10q +@end example +@c end--------------------------------------------- + +@node tail +@section Printing the Last Lines + +Printing the last @var{n} lines rather than the first is more complex +but indeed possible. @var{n} is encoded in the second line, before +the bang character. + +This script is similar to the @command{tac} script in that it keeps the +final output in the hold space and prints it at the end: + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +1! @{; H; g; @} +1,10 !s/[^\n]*\n// +$p +h +@end example +@c end--------------------------------------------- + +Mainly, the scripts keeps a window of 10 lines and slides it +by adding a line and deleting the oldest (the substitution command +on the second line works like a @code{D} command but does not +restart the loop). + +The ``sliding window'' technique is a very powerful way to write +efficient and complex @command{sed} scripts, because commands like +@code{P} would require a lot of work if implemented manually. + +To introduce the technique, which is fully demonstrated in the +rest of this chapter and is based on the @code{N}, @code{P} +and @code{D} commands, here is an implementation of @command{tail} +using a simple ``sliding window.'' + +This looks complicated but in fact the working is the same as +the last script: after we have kicked in the appropriate number +of lines, however, we stop using the hold space to keep inter-line +state, and instead use @code{N} and @code{D} to slide pattern +space by one line: + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +1h +2,10 @{; H; g; @} +$q +1,9d +N +D +@end example +@c end--------------------------------------------- + +Note how the first, second and fourth line are inactive after +the first ten lines of input. After that, all the script does +is: exiting on the last line of input, appending the next input +line to pattern space, and removing the first line. + +@node uniq +@section Make Duplicate Lines Unique + +This is an example of the art of using the @code{N}, @code{P} +and @code{D} commands, probably the most difficult to master. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f +h + +:b +# On the last line, print and exit +$b +N +/^\(.*\)\n\1$/ @{ + # The two lines are identical. Undo the effect of + # the n command. + g + bb +@} + +# If the @code{N} command had added the last line, print and exit +$b + +# The lines are different; print the first and go +# back working on the second. +P +D +@end example +@c end--------------------------------------------- + +As you can see, we mantain a 2-line window using @code{P} and @code{D}. +This technique is often used in advanced @command{sed} scripts. + +@node uniq -d +@section Print Duplicated Lines of Input + +This script prints only duplicated lines, like @samp{uniq -d}. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +$b +N +/^\(.*\)\n\1$/ @{ + # Print the first of the duplicated lines + s/.*\n// + p + + # Loop until we get a different line + :b + $b + N + /^\(.*\)\n\1$/ @{ + s/.*\n// + bb + @} +@} + +# The last line cannot be followed by duplicates +$b + +# Found a different one. Leave it alone in the pattern space +# and go back to the top, hunting its duplicates +D +@end example +@c end--------------------------------------------- + +@node uniq -u +@section Remove All Duplicated Lines + +This script prints only unique lines, like @samp{uniq -u}. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +# Search for a duplicate line --- until that, print what you find. +$b +N +/^\(.*\)\n\1$/ ! @{ + P + D +@} + +:c +# Got two equal lines in pattern space. At the +# end of the file we simply exit +$d + +# Else, we keep reading lines with @code{N} until we +# find a different one +s/.*\n// +N +/^\(.*\)\n\1$/ @{ + bc +@} + +# Remove the last instance of the duplicate line +# and go back to the top +D +@end example +@c end--------------------------------------------- + +@node cat -s +@section Squeezing Blank Lines + +As a final example, here are three scripts, of increasing complexity +and speed, that implement the same function as @samp{cat -s}, that is +squeezing blank lines. + +The first leaves a blank line at the beginning and end if there are +some already. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +# on empty lines, join with next +# Note there is a star in the regexp +:x +/^\n*$/ @{ +N +bx +@} + +# now, squeeze all '\n', this can be also done by: +# s/^\(\n\)*/\1/ +s/\n*/\ +/ +@end example +@c end--------------------------------------------- + +This one is a bit more complex and removes all empty lines +at the beginning. It does leave a single blank line at end +if one was there. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +# delete all leading empty lines +1,/^./@{ +/./!d +@} + +# on an empty line we remove it and all the following +# empty lines, but one +:x +/./!@{ +N +s/^\n$// +tx +@} +@end example +@c end--------------------------------------------- + +This removes leading and trailing blank lines. It is also the +fastest. Note that loops are completely done with @code{n} and +@code{b}, without relying on @command{sed} to restart the +the script automatically at the end of a line. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +# delete all (leading) blanks +/./!d + +# get here: so there is a non empty +:x +# print it +p +# get next +n +# got chars? print it again, etc... +/./bx + +# no, don't have chars: got an empty line +:z +# get next, if last line we finish here so no trailing +# empty lines are written +n +# also empty? then ignore it, and get next... this will +# remove ALL empty lines +/./!bz + +# all empty lines were deleted/ignored, but we have a non empty. As +# what we want to do is to squeeze, insert a blank line artificially +i\ + +bx +@end example +@c end--------------------------------------------- + +@node Limitations +@chapter @value{SSED}'s Limitations and Non-limitations + +@cindex @acronym{GNU} extensions, unlimited line length +@cindex Portability, line length limitations +For those who want to write portable @command{sed} scripts, +be aware that some implementations have been known to +limit line lengths (for the pattern and hold spaces) +to be no more than 4000 bytes. +The @sc{posix} standard specifies that conforming @command{sed} +implementations shall support at least 8192 byte line lengths. +@value{SSED} has no built-in limit on line length; +as long as it can @code{malloc()} more (virtual) memory, +you can feed or construct lines as long as you like. + +However, recursion is used to handle subpatterns and indefinite +repetition. This means that the available stack space may limit +the size of the buffer that can be processed by certain patterns. + +@ifset PERL +There are some size limitations in the regular expression +matcher but it is hoped that they will never in practice +be relevant. The maximum length of a compiled pattern +is 65539 (sic) bytes. All values in repeating quantifiers +must be less than 65536. The maximum nesting depth of +all parenthesized subpatterns, including capturing and +non-capturing subpatterns@footnote{The +distinction is meaningful when referring to Perl-style +regular expressions.}, assertions, and other types of +subpattern, is 200. + +Also, @value{SSED} recognizes the @sc{posix} syntax +@code{[.@var{ch}.]} and @code{[=@var{ch}=]} +where @var{ch} is a ``collating element'', but these +are not supported, and an error is given if they are +encountered. + +Here are a few distinctions between the real Perl-style +regular expressions and those that @option{-R} recognizes. + +@enumerate +@item +Lookahead assertions do not allow repeat quantifiers after them +Perl permits them, but they do not mean what you +might think. For example, @samp{(?!a)@{3@}} does not assert that the +next three characters are not @samp{a}. It just asserts three times that the +next character is not @samp{a} --- a waste of time and nothing else. + +@item +Capturing subpatterns that occur inside negative lookahead +head assertions are counted, but their entries are counted +as empty in the second half of an @code{s} command. +Perl sets its numerical variables from any such patterns +that are matched before the assertion fails to match +something (thereby succeeding), but only if the negative +lookahead assertion contains just one branch. + +@item +The following Perl escape sequences are not supported: +@samp{\l}, @samp{\u}, @samp{\L}, @samp{\U}, @samp{\E}, +@samp{\Q}. In fact these are implemented by Perl's general +string-handling and are not part of its pattern matching engine. + +@item +The Perl @samp{\G} assertion is not supported as it is not +relevant to single pattern matches. + +@item +Fairly obviously, @value{SSED} does not support the @samp{(?@{code@})} +and @samp{(?p@{code@})} constructions. However, there is some experimental +support for recursive patterns using the non-Perl item @samp{(?R)}. + +@item +There are at the time of writing some oddities in Perl +5.005_02 concerned with the settings of captured strings +when part of a pattern is repeated. For example, matching +@samp{aba} against the pattern @samp{/^(a(b)?)+$/} sets +@samp{$2}@footnote{@samp{$2} would be @samp{\2} in @value{SSED}.} +to the value @samp{b}, but matching @samp{aabbaa} +against @samp{/^(aa(bb)?)+$/} leaves @samp{$2} +unset. However, if the pattern is changed to +@samp{/^(aa(b(b))?)+$/} then @samp{$2} (and @samp{$3}) are set. +In Perl 5.004 @samp{$2} is set in both cases, and that is also +true of @value{SSED}. + +@item +Another as yet unresolved discrepancy is that in Perl +5.005_02 the pattern @samp{/^(a)?(?(1)a|b)+$/} matches +the string @samp{a}, whereas in @value{SSED} it does not. +However, in both Perl and @value{SSED} @samp{/^(a)?a/} matched +against @samp{a} leaves $1 unset. +@end enumerate +@end ifset + +@node Other Resources +@chapter Other Resources for Learning About @command{sed} + +@cindex Additional reading about @command{sed} +In addition to several books that have been written about @command{sed} +(either specifically or as chapters in books which discuss +shell programming), one can find out more about @command{sed} +(including suggestions of a few books) from the FAQ +for the @code{sed-users} mailing list, available from: +@display +@uref{http://sed.sourceforge.net/sedfaq.html} +@end display + +Also of interest are +@uref{http://www.student.northpark.edu/pemente/sed/index.htm} +and @uref{http://sed.sf.net/grabbag}, +which include @command{sed} tutorials and other @command{sed}-related goodies. + +The @code{sed-users} mailing list itself maintained by Sven Guckes. +To subscribe, visit @uref{http://groups.yahoo.com} and search +for the @code{sed-users} mailing list. + +@node Reporting Bugs +@chapter Reporting Bugs + +@cindex Bugs, reporting +Email bug reports to @email{bug-sed@@gnu.org}. +Also, please include the output of @samp{sed --version} in the body +of your report if at all possible. + +Please do not send a bug report like this: + +@example +@i{@i{@r{while building frobme-1.3.4}}} +$ configure +@error{} sed: file sedscr line 1: Unknown option to 's' +@end example + +If @value{SSED} doesn't configure your favorite package, take a +few extra minutes to identify the specific problem and make a stand-alone +test case. Unlike other programs such as C compilers, making such test +cases for @command{sed} is quite simple. + +A stand-alone test case includes all the data necessary to perform the +test, and the specific invocation of @command{sed} that causes the problem. +The smaller a stand-alone test case is, the better. A test case should +not involve something as far removed from @command{sed} as ``try to configure +frobme-1.3.4''. Yes, that is in principle enough information to look +for the bug, but that is not a very practical prospect. + +Here are a few commonly reported bugs that are not bugs. + +@table @asis +@item @code{N} command on the last line +@cindex Portability, @code{N} command on the last line +@cindex Non-bugs, @code{N} command on the last line + +Most versions of @command{sed} exit without printing anything when +the @command{N} command is issued on the last line of a file. +@value{SSED} prints pattern space before exiting unless of course +the @command{-n} command switch has been specified. This choice is +by design. + +For example, the behavior of +@example +sed N foo bar +@end example +@noindent +would depend on whether foo has an even or an odd number of +lines@footnote{which is the actual ``bug'' that prompted the +change in behavior}. Or, when writing a script to read the +next few lines following a pattern match, traditional +implementations of @code{sed} would force you to write +something like +@example +/foo/@{ $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N @} +@end example +@noindent +instead of just +@example +/foo/@{ N;N;N;N;N;N;N;N;N; @} +@end example + +@cindex @code{POSIXLY_CORRECT} behavior, @code{N} command +In any case, the simplest workaround is to use @code{$d;N} in +scripts that rely on the traditional behavior, or to set +the @code{POSIXLY_CORRECT} variable to a non-empty value. + +@item Regex syntax clashes (problems with backslashes) +@cindex @acronym{GNU} extensions, to basic regular expressions +@cindex Non-bugs, regex syntax clashes +@command{sed} uses the @sc{posix} basic regular expression syntax. According to +the standard, the meaning of some escape sequences is undefined in +this syntax; notable in the case of @command{sed} are @code{\|}, +@code{\+}, @code{\?}, @code{\`}, @code{\'}, @code{\<}, +@code{\>}, @code{\b}, @code{\B}, @code{\w}, and @code{\W}. + +As in all @acronym{GNU} programs that use @sc{posix} basic regular +expressions, @command{sed} interprets these escape sequences as special +characters. So, @code{x\+} matches one or more occurrences of @samp{x}. +@code{abc\|def} matches either @samp{abc} or @samp{def}. + +This syntax may cause problems when running scripts written for other +@command{sed}s. Some @command{sed} programs have been written with the +assumption that @code{\|} and @code{\+} match the literal characters +@code{|} and @code{+}. Such scripts must be modified by removing the +spurious backslashes if they are to be used with modern implementations +of @command{sed}, like +@ifset PERL +@value{SSED} or +@end ifset +@acronym{GNU} @command{sed}. + +On the other hand, some scripts use s|abc\|def||g to remove occurrences +of @emph{either} @code{abc} or @code{def}. While this worked until +@command{sed} 4.0.x, newer versions interpret this as removing the +string @code{abc|def}. This is again undefined behavior according to +@acronym{POSIX}, and this interpretation is arguably more robust: older +@command{sed}s, for example, required that the regex matcher parsed +@code{\/} as @code{/} in the common case of escaping a slash, which is +again undefined behavior; the new behavior avoids this, and this is good +because the regex matcher is only partially under our control. + +@cindex @acronym{GNU} extensions, special escapes +In addition, this version of @command{sed} supports several escape characters +(some of which are multi-character) to insert non-printable characters +in scripts (@code{\a}, @code{\c}, @code{\d}, @code{\o}, @code{\r}, +@code{\t}, @code{\v}, @code{\x}). These can cause similar problems +with scripts written for other @command{sed}s. + +@item @option{-i} clobbers read-only files +@cindex In-place editing +@cindex @value{SSEDEXT}, in-place editing +@cindex Non-bugs, in-place editing + +In short, @samp{sed -i} will let you delete the contents of +a read-only file, and in general the @option{-i} option +(@pxref{Invoking sed, , Invocation}) lets you clobber +protected files. This is not a bug, but rather a consequence +of how the Unix filesystem works. + +The permissions on a file say what can happen to the data +in that file, while the permissions on a directory say what can +happen to the list of files in that directory. @samp{sed -i} +will not ever open for writing a file that is already on disk. +Rather, it will work on a temporary file that is finally renamed +to the original name: if you rename or delete files, you're actually +modifying the contents of the directory, so the operation depends on +the permissions of the directory, not of the file. For this same +reason, @command{sed} does not let you use @option{-i} on a writeable file +in a read-only directory, and will break hard or symbolic links when +@option{-i} is used on such a file. + +@item @code{0a} does not work (gives an error) +@cindex @code{0} address +@cindex @acronym{GNU} extensions, @code{0} address +@cindex Non-bugs, @code{0} address + +There is no line 0. 0 is a special address that is only used to treat +addresses like @code{0,/@var{RE}/} as active when the script starts: if +you write @code{1,/abc/d} and the first line includes the word @samp{abc}, +then that match would be ignored because address ranges must span at least +two lines (barring the end of the file); but what you probably wanted is +to delete every line up to the first one including @samp{abc}, and this +is obtained with @code{0,/abc/d}. + +@ifclear PERL +@item @code{[a-z]} is case insensitive +@cindex Non-bugs, localization-related + +You are encountering problems with locales. POSIX mandates that @code{[a-z]} +uses the current locale's collation order -- in C parlance, that means using +@code{strcoll(3)} instead of @code{strcmp(3)}. Some locales have a +case-insensitive collation order, others don't. + +Another problem is that @code{[a-z]} tries to use collation symbols. +This only happens if you are on the @acronym{GNU} system, using +@acronym{GNU} libc's regular expression matcher instead of compiling the +one supplied with @acronym{GNU} sed. In a Danish locale, for example, +the regular expression @code{^[a-z]$} matches the string @samp{aa}, +because this is a single collating symbol that comes after @samp{a} +and before @samp{b}; @samp{ll} behaves similarly in Spanish +locales, or @samp{ij} in Dutch locales. + +To work around these problems, which may cause bugs in shell scripts, set +the @env{LC_COLLATE} and @env{LC_CTYPE} environment variables to @samp{C}. + +@item @code{s/.*//} does not clear pattern space +@cindex Non-bugs, localization-related +@cindex @value{SSEDEXT}, emptying pattern space +@cindex Emptying pattern space + +This happens if your input stream includes invalid multibyte +sequences. @sc{posix} mandates that such sequences +are @emph{not} matched by @samp{.}, so that @samp{s/.*//} will not clear +pattern space as you would expect. In fact, there is no way to clear +sed's buffers in the middle of the script in most multibyte locales +(including UTF-8 locales). For this reason, @value{SSED} provides a `z' +command (for `zap') as an extension. + +To work around these problems, which may cause bugs in shell scripts, set +the @env{LC_COLLATE} and @env{LC_CTYPE} environment variables to @samp{C}. +@end ifclear +@end table + + +@node Extended regexps +@appendix Extended regular expressions +@cindex Extended regular expressions, syntax + +The only difference between basic and extended regular expressions is in +the behavior of a few characters: @samp{?}, @samp{+}, parentheses, +braces (@samp{@{@}}), and @samp{|}. While basic regular expressions +require these to be escaped if you want them to behave as special +characters, when using extended regular expressions you must escape +them if you want them @emph{to match a literal character}. @samp{|} +is special here because @samp{\|} is a GNU extension -- standard +basic regular expressions do not provide its functionality. + +@noindent +Examples: +@table @code +@item abc? +becomes @samp{abc\?} when using extended regular expressions. It matches +the literal string @samp{abc?}. + +@item c\+ +becomes @samp{c+} when using extended regular expressions. It matches +one or more @samp{c}s. + +@item a\@{3,\@} +becomes @samp{a@{3,@}} when using extended regular expressions. It matches +three or more @samp{a}s. + +@item \(abc\)\@{2,3\@} +becomes @samp{(abc)@{2,3@}} when using extended regular expressions. It +matches either @samp{abcabc} or @samp{abcabcabc}. + +@item \(abc*\)\1 +becomes @samp{(abc*)\1} when using extended regular expressions. +Backreferences must still be escaped when using extended regular +expressions. +@end table + +@ifset PERL +@node Perl regexps +@appendix Perl-style regular expressions +@cindex Perl-style regular expressions, syntax + +@emph{This part is taken from the @file{pcre.txt} file distributed together +with the free @sc{pcre} regular expression matcher; it was written by Philip Hazel.} + +Perl introduced several extensions to regular expressions, some +of them incompatible with the syntax of regular expressions +accepted by Emacs and other @acronym{GNU} tools (whose matcher was +based on the Emacs matcher). @value{SSED} implements +both kinds of extensions. + +@iftex +Summarizing, we have: + +@itemize @bullet +@item +A backslash can introduce several special sequences + +@item +The circumflex, dollar sign, and period characters behave specially +with regard to new lines + +@item +Strange uses of square brackets are parsed differently + +@item +You can toggle modifiers in the middle of a regular expression + +@item +You can specify that a subpattern does not count when numbering backreferences + +@item +@cindex Greedy regular expression matching +You can specify greedy or non-greedy matching + +@item +You can have more than ten back references + +@item +You can do complex look aheads and look behinds (in the spirit of +@code{\b}, but with subpatterns). + +@item +You can often improve performance by avoiding that @command{sed} wastes +time with backtracking + +@item +You can have if/then/else branches + +@item +You can do recursive matches, for example to look for unbalanced parentheses + +@item +You can have comments and non-significant whitespace, because things can +get complex... +@end itemize + +Most of these extensions are introduced by the special @code{(?} +sequence, which gives special meanings to parenthesized groups. +@end iftex +@menu +Other extensions can be roughly subdivided in two categories +On one hand Perl introduces several more escaped sequences +(that is, sequences introduced by a backslash). On the other +hand, it specifies that if a question mark follows an open +parentheses it should give a special meaning to the parenthesized +group. + +* Backslash:: Introduces special sequences +* Circumflex/dollar sign/period:: Behave specially with regard to new lines +* Square brackets:: Are a bit different in strange cases +* Options setting:: Toggle modifiers in the middle of a regexp +* Non-capturing subpatterns:: Are not counted when backreferencing +* Repetition:: Allows for non-greedy matching +* Backreferences:: Allows for more than 10 back references +* Assertions:: Allows for complex look ahead matches +* Non-backtracking subpatterns:: Often gives more performance +* Conditional subpatterns:: Allows if/then/else branches +* Recursive patterns:: For example to match parentheses +* Comments:: Because things can get complex... +@end menu + +@node Backslash +@appendixsec Backslash +@cindex Perl-style regular expressions, escaped sequences + +There are a few difference in the handling of backslashed +sequences in Perl mode. + +First of all, there are no @code{\o} and @code{\d} sequences. +@sc{ascii} values for characters can be specified in octal +with a @code{\@var{xxx}} sequence, where @var{xxx} is a +sequence of up to three octal digits. If the first digit +is a zero, the treatment of the sequence is straightforward; +just note that if the character that follows the escaped digit +is itself an octal digit, you have to supply three octal digits +for @var{xxx}. For example @code{\07} is a @sc{bel} character +rather than a @sc{nul} and a literal @code{7} (this sequence is +instead represented by @code{\0007}). + +@cindex Perl-style regular expressions, backreferences +The handling of a backslash followed by a digit other than 0 +is complicated. Outside a character class, @command{sed} reads it +and any following digits as a decimal number. If the number +is less than 10, or if there have been at least that many +previous capturing left parentheses in the expression, the +entire sequence is taken as a back reference. A description +of how this works is given later, following the discussion +of parenthesized subpatterns. + +Inside a character class, or if the decimal number is +greater than 9 and there have not been that many capturing +subpatterns, @command{sed} re-reads up to three octal digits following +the backslash, and generates a single byte from the +least significant 8 bits of the value. Any subsequent digits +stand for themselves. For example: + +@example +\040 @i{@r{is another way of writing a space}} +\40 @i{@r{is the same, provided there are fewer than 40}} + @i{@r{previous capturing subpatterns}} +\7 @i{@r{is always a back reference}} +\011 @i{@r{is always a tab}} +\11 @i{@r{might be a back reference, or another way of writing a tab}} +\0113 @i{@r{is a tab followed by the character @samp{3}}} +\113 @i{@r{is the character with octal code 113 (since there}} + @i{@r{can be no more than 99 back references)}} +\377 @i{@r{is a byte consisting entirely of 1 bits (@sc{ascii} 255)}} +\81 @i{@r{is either a back reference, or a binary zero}} + @i{@r{followed by the two characters @samp{81}}} +@end example + +Note that octal values of 100 or greater must not be introduced +by a leading zero, because no more than three octal +digits are ever read. Note that this applies only to the LHS +pattern; it is not possible yet to specify more than 9 backreferences +on the RHS of the `s' command. + +All the sequences that define a single byte value can be +used both inside and outside character classes. In addition, +inside a character class, the sequence @code{\b} is interpreted +as the backspace character (hex 08). Outside a character +class it has a different meaning (see below). + +In addition, there are four additional escapes specifying +generic character classes (like @code{\w} and @code{\W} do): + +@cindex Perl-style regular expressions, character classes +@table @samp +@item \d +Matches any decimal digit + +@item \D +Matches any character that is not a decimal digit +@end table + +In Perl mode, these character type sequences can appear both inside and +outside character classes. Instead, in @sc{posix} mode these sequences +(as well as @code{\w} and @code{\W}) are treated as two literal characters +(a backslash and a letter) inside square brackets. + +Escaped sequences specifying assertions are also different in +Perl mode. An assertion specifies a condition that has to be met +at a particular point in a match, without consuming any +characters from the subject string. The use of subpatterns +for more complicated assertions is described below. The +backslashed assertions are + +@cindex Perl-style regular expressions, assertions +@table @samp +@item \b +Asserts that the point is at a word boundary. +A word boundary is a position in the subject string where +the current character and the previous character do not both +match @code{\w} or @code{\W} (i.e. one matches @code{\w} and +the other matches @code{\W}), or the start or end of the string +if the first or last character matches @code{\w}, respectively. + +@item \B +Asserts that the point is not at a word boundary. + +@item \A +Asserts the matcher is at the start of pattern space (independent +of multiline mode). + +@item \Z +Asserts the matcher is at the end of pattern space, +or at a newline before the end of pattern space (independent of +multiline mode) + +@item \z +Asserts the matcher is at the end of pattern space (independent +of multiline mode) +@end table + +These assertions may not appear in character classes (but +note that @code{\b} has a different meaning, namely the +backspace character, inside a character class). +Note that Perl mode does not support directly assertions +for the beginning and the end of word; the @acronym{GNU} extensions +@code{\<} and @code{\>} achieve this purpose in @sc{posix} mode +instead. + +The @code{\A}, @code{\Z}, and @code{\z} assertions differ +from the traditional circumflex and dollar sign (described below) +in that they only ever match at the very start and end of the +subject string, whatever options are set; in particular @code{\A} +and @code{\z} are the same as the @acronym{GNU} extensions +@code{\`} and @code{\'} that are active in @sc{posix} mode. + +@node Circumflex/dollar sign/period +@appendixsec Circumflex, dollar sign, period +@cindex Perl-style regular expressions, newlines + +Outside a character class, in the default matching mode, the +circumflex character is an assertion which is true only if +the current matching point is at the start of the subject +string. Inside a character class, the circumflex has an entirely +different meaning (see below). + +The circumflex need not be the first character of the pattern if +a number of alternatives are involved, but it should be the +first thing in each alternative in which it appears if the +pattern is ever to match that branch. If all possible alternatives, +start with a circumflex, that is, if the pattern is +constrained to match only at the start of the subject, it is +said to be an @dfn{anchored} pattern. (There are also other constructs +structs that can cause a pattern to be anchored.) + +A dollar sign is an assertion which is true only if the +current matching point is at the end of the subject string, +or immediately before a newline character that is the last +character in the string (by default). A dollar sign need not be the +last character of the pattern if a number of alternatives +are involved, but it should be the last item in any branch +in which it appears. A dollar sign has no special meaning in a +character class. + +@cindex Perl-style regular expressions, multiline +The meanings of the circumflex and dollar sign characters are +changed if the @code{M} modifier option is used. When this is +the case, they match immediately after and immediately +before an internal @code{\n} character, respectively, in addition +to matching at the start and end of the subject string. For +example, the pattern @code{/^abc$/} matches the subject string +@samp{def\nabc} in multiline mode, but not otherwise. Consequently, +patterns that are anchored in single line mode +because all branches start with @code{^} are not anchored in +multiline mode. + +@cindex Perl-style regular expressions, multiline +Note that the sequences @code{\A}, @code{\Z}, and @code{\z} +can be used to match the start and end of the subject in both +modes, and if all branches of a pattern start with @code{\A} +is it always anchored, whether the @code{M} modifier is set or not. + +@cindex Perl-style regular expressions, single line +Outside a character class, a dot in the pattern matches any +one character in the subject, including a non-printing character, +but not (by default) newline. If the @code{S} modifier is used, +dots match newlines as well. Actually, the handling of +dot is entirely independent of the handling of circumflex +and dollar sign, the only relationship being that they both +involve newline characters. Dot has no special meaning in a +character class. + +@node Square brackets +@appendixsec Square brackets +@cindex Perl-style regular expressions, character classes + +An opening square bracket introduces a character class, terminated +by a closing square bracket. A closing square bracket on its own +is not special. If a closing square bracket is required as a +member of the class, it should be the first data character in +the class (after an initial circumflex, if present) or escaped with a backslash. + +A character class matches a single character in the subject; +the character must be in the set of characters defined by +the class, unless the first character in the class is a circumflex, +in which case the subject character must not be in +the set defined by the class. If a circumflex is actually +required as a member of the class, ensure it is not the +first character, or escape it with a backslash. + +For example, the character class [aeiou] matches any lower +case vowel, while [^aeiou] matches any character that is not +a lower case vowel. Note that a circumflex is just a convenient +venient notation for specifying the characters which are in +the class by enumerating those that are not. It is not an +assertion: it still consumes a character from the subject +string, and fails if the current pointer is at the end of +the string. + +@cindex Perl-style regular expressions, case-insensitive +When caseless matching is set, any letters in a class +represent both their upper case and lower case versions, so +for example, a caseless @code{[aeiou]} matches uppercase +and lowercase @samp{A}s, and a caseless @code{[^aeiou]} +does not match @samp{A}, whereas a case-sensitive version would. + +@cindex Perl-style regular expressions, single line +@cindex Perl-style regular expressions, multiline +The newline character is never treated in any special way in +character classes, whatever the setting of the @code{S} and +@code{M} options (modifiers) is. A class such as @code{[^a]} will +always match a newline. + +The minus (hyphen) character can be used to specify a range +of characters in a character class. For example, @code{[d-m]} +matches any letter between d and m, inclusive. If a minus +character is required in a class, it must be escaped with a +backslash or appear in a position where it cannot be interpreted +as indicating a range, typically as the first or last +character in the class. + +It is not possible to have the literal character @code{]} as the +end character of a range. A pattern such as @code{[W-]46]} is +interpreted as a class of two characters (@code{W} and @code{-}) +followed by a literal string @code{46]}, so it would match +@samp{W46]} or @samp{-46]}. However, if the @code{]} is escaped +with a backslash it is interpreted as the end of range, so +@code{[W-\]46]} is interpreted as a single class containing a +range followed by two separate characters. The octal or +hexadecimal representation of @code{]} can also be used to end a range. + +Ranges operate in @sc{ascii} collating sequence. They can also be +used for characters specified numerically, for example +@code{[\000-\037]}. If a range that includes letters is used when +caseless matching is set, it matches the letters in either +case. For example, a caseless @code{[W-c]} is equivalent to +@code{[][\^_`wxyzabc]}, matched caselessly, and if character +tables for the French locale are in use, @code{[\xc8-\xcb]} +matches accented E characters in both cases. + +Unlike in @sc{posix} mode, the character types @code{\d}, +@code{\D}, @code{\s}, @code{\S}, @code{\w}, and @code{\W} +may also appear in a character class, and add the characters +that they match to the class. For example, @code{[\dABCDEF]} matches any +hexadecimal digit. A circumflex can conveniently be used +with the upper case character types to specify a more restricted +set of characters than the matching lower case type. +For example, the class @code{[^\W_]} matches any letter or digit, +but not underscore. + +All non-alphameric characters other than @code{\}, @code{-}, +@code{^} (at the start) and the terminating @code{]} +are non-special in character classes, but it does no harm +if they are escaped. + +Perl 5.6 supports the @sc{posix} notation for character classes, which +uses names enclosed by @code{[:} and @code{:]} within the enclosing +square brackets, and @value{SSED} supports this notation as well. +For example, + +@example +[01[:alpha:]%] +@end example + +@noindent +matches @samp{0}, @samp{1}, any alphabetic character, or @samp{%}. +The supported class names are + +@table @code +@item alnum +Matches letters and digits + +@item alpha +Matches letters + +@item ascii +Matches character codes 0 - 127 + +@item cntrl +Matches control characters + +@item digit +Matches decimal digits (same as \d) + +@item graph +Matches printing characters, excluding space + +@item lower +Matches lower case letters + +@item print +Matches printing characters, including space + +@item punct +Matches printing characters, excluding letters and digits + +@item space +Matches white space (same as \s) + +@item upper +Matches upper case letters + +@item word +Matches ``word'' characters (same as \w) + +@item xdigit +Matches hexadecimal digits +@end table + +The names @code{ascii} and @code{word} are extensions valid only in +Perl mode. Another Perl extension is negation, which is +indicated by a circumflex character after the colon. For example, + +@example +[12[:^digit:]] +@end example + +@noindent +matches @samp{1}, @samp{2}, or any non-digit. + +@node Options setting +@appendixsec Options setting +@cindex Perl-style regular expressions, toggling options +@cindex Perl-style regular expressions, case-insensitive +@cindex Perl-style regular expressions, multiline +@cindex Perl-style regular expressions, single line +@cindex Perl-style regular expressions, extended + +The settings of the @code{I}, @code{M}, @code{S}, @code{X} +modifiers can be changed from within the pattern by +a sequence of Perl option letters enclosed between @code{(?} +and @code{)}. The option letters must be lowercase. + +For example, @code{(?im)} sets caseless, multiline matching. It is +also possible to unset these options by preceding the letter +with a hyphen; you can also have combined settings and unsettings: +@code{(?im-sx)} sets caseless and multiline matching, +while unsets single line matching (for dots) and extended +whitespace interpretation. If a letter appears both before +and after the hyphen, the option is unset. + +The scope of these option changes depends on where in the +pattern the setting occurs. For settings that are outside +any subpattern (defined below), the effect is the same as if +the options were set or unset at the start of matching. The +following patterns all behave in exactly the same way: + +@example +(?i)abc +a(?i)bc +ab(?i)c +abc(?i) +@end example + +which in turn is the same as specifying the pattern abc with +the @code{I} modifier. In other words, ``top level'' settings +apply to the whole pattern (unless there are other +changes inside subpatterns). If there is more than one setting +of the same option at top level, the rightmost setting +is used. + +If an option change occurs inside a subpattern, the effect +is different. This is a change of behaviour in Perl 5.005. +An option change inside a subpattern affects only that part +of the subpattern @emph{that follows} it, so + +@example +(a(?i)b)c +@end example + +@noindent +matches abc and aBc and no other strings (assuming +case-sensitive matching is used). By this means, options can +be made to have different settings in different parts of the +pattern. Any changes made in one alternative do carry on +into subsequent branches within the same subpattern. For +example, + +@example +(a(?i)b|c) +@end example + +@noindent +matches @samp{ab}, @samp{aB}, @samp{c}, and @samp{C}, +even though when matching @samp{C} the first branch is +abandoned before the option setting. +This is because the effects of option settings happen at +compile time. There would be some very weird behaviour otherwise. + +@ignore +There are two PCRE-specific options PCRE_UNGREEDY and PCRE_EXTRA +that can be changed in the same way as the Perl-compatible options by +using the characters U and X respectively. The (?X) flag +setting is special in that it must always occur earlier in +the pattern than any of the additional features it turns on, +even when it is at top level. It is best put at the start. +@end ignore + + +@node Non-capturing subpatterns +@appendixsec Non-capturing subpatterns +@cindex Perl-style regular expressions, non-capturing subpatterns + +Marking part of a pattern as a subpattern does two things. +On one hand, it localizes a set of alternatives; on the other +hand, it sets up the subpattern as a capturing subpattern (as +defined above). The subpattern can be backreferenced and +referenced in the right side of @code{s} commands. + +For example, if the string @samp{the red king} is matched against +the pattern + +@example +the ((red|white) (king|queen)) +@end example + +@noindent +the captured substrings are @samp{red king}, @samp{red}, +and @samp{king}, and are numbered 1, 2, and 3. + +The fact that plain parentheses fulfil two functions is not +always helpful. There are often times when a grouping +subpattern is required without a capturing requirement. If an +opening parenthesis is followed by @code{?:}, the subpattern does +not do any capturing, and is not counted when computing the +number of any subsequent capturing subpatterns. For example, +if the string @samp{the white queen} is matched against the pattern + +@example +the ((?:red|white) (king|queen)) +@end example + +@noindent +the captured substrings are @samp{white queen} and @samp{queen}, +and are numbered 1 and 2. The maximum number of captured +substrings is 99, while the maximum number of all subpatterns, +both capturing and non-capturing, is 200. + +As a convenient shorthand, if any option settings are +equired at the start of a non-capturing subpattern, the +option letters may appear between the @code{?} and the +@code{:}. Thus the two patterns + +@example +(?i:saturday|sunday) +(?:(?i)saturday|sunday) +@end example + +@noindent +match exactly the same set of strings. Because alternative +branches are tried from left to right, and options are not +reset until the end of the subpattern is reached, an option +setting in one branch does affect subsequent branches, so +the above patterns match @samp{SUNDAY} as well as @samp{Saturday}. + + +@node Repetition +@appendixsec Repetition +@cindex Perl-style regular expressions, repetitions + +Repetition is specified by quantifiers, which can follow any +of the following items: + +@itemize @bullet +@item +a single character, possibly escaped + +@item +the @code{.} special character + +@item +a character class + +@item +a back reference (see next section) + +@item +a parenthesized subpattern (unless it is an assertion; @pxref{Assertions}) +@end itemize + +The general repetition quantifier specifies a minimum and +maximum number of permitted matches, by giving the two +numbers in curly brackets (braces), separated by a comma. +The numbers must be less than 65536, and the first must be +less than or equal to the second. For example: + +@example +z@{2,4@} +@end example + +@noindent +matches @samp{zz}, @samp{zzz}, or @samp{zzzz}. A closing brace on its own +is not a special character. If the second number is omitted, +but the comma is present, there is no upper limit; if the +second number and the comma are both omitted, the quantifier +specifies an exact number of required matches. Thus + +@example +[aeiou]@{3,@} +@end example + +@noindent +matches at least 3 successive vowels, but may match many +more, while + +@example +\d@{8@} +@end example + +@noindent +matches exactly 8 digits. An opening curly bracket that +appears in a position where a quantifier is not allowed, or +one that does not match the syntax of a quantifier, is taken +as a literal character. For example, @{,6@} is not a quantifier, +but a literal string of four characters.@footnote{It +raises an error if @option{-R} is not used.} + +The quantifier @samp{@{0@}} is permitted, causing the expression to +behave as if the previous item and the quantifier were not +present. + +For convenience (and historical compatibility) the three +most common quantifiers have single-character abbreviations: + +@table @code +@item * +is equivalent to @{0,@} + +@item + +is equivalent to @{1,@} + +@item ? +is equivalent to @{0,1@} +@end table + +It is possible to construct infinite loops by following a +subpattern that can match no characters with a quantifier +that has no upper limit, for example: + +@example +(a?)* +@end example + +Earlier versions of Perl used to give an error at +compile time for such patterns. However, because there are +cases where this can be useful, such patterns are now +accepted, but if any repetition of the subpattern does in +fact match no characters, the loop is forcibly broken. + +@cindex Greedy regular expression matching +@cindex Perl-style regular expressions, stingy repetitions +By default, the quantifiers are @dfn{greedy} like in @sc{posix} +mode, that is, they match as much as possible (up to the maximum +number of permitted times), without causing the rest of the +pattern to fail. The classic example of where this gives problems +is in trying to match comments in C programs. These appear between +the sequences @code{/*} and @code{*/} and within the sequence, individual +@code{*} and @code{/} characters may appear. An attempt to match C +comments by applying the pattern + +@example +/\*.*\*/ +@end example + +@noindent +to the string + +@example +/* first command */ not comment /* second comment */ +@end example + +@noindent + +fails, because it matches the entire string owing to the +greediness of the @code{.*} item. + +However, if a quantifier is followed by a question mark, it +ceases to be greedy, and instead matches the minimum number +of times possible, so the pattern @code{/\*.*?\*/} +does the right thing with the C comments. The meaning of the +various quantifiers is not otherwise changed, just the preferred +number of matches. Do not confuse this use of question +mark with its use as a quantifier in its own right. +Because it has two uses, it can sometimes appear doubled, as in + +@example +\d??\d +@end example + +which matches one digit by preference, but can match two if +that is the only way the rest of the pattern matches. + +Note that greediness does not matter when specifying addresses, +but can be nevertheless used to improve performance. + +@ignore +If the PCRE_UNGREEDY option is set (an option which is not +available in Perl), the quantifiers are not greedy by +default, but individual ones can be made greedy by following +them with a question mark. In other words, it inverts the +default behaviour. +@end ignore + +When a parenthesized subpattern is quantified with a minimum +repeat count that is greater than 1 or with a limited maximum, +more store is required for the compiled pattern, in +proportion to the size of the minimum or maximum. + +@cindex Perl-style regular expressions, single line +If a pattern starts with @code{.*} or @code{.@{0,@}} and the +@code{S} modifier is used, the pattern is implicitly anchored, +because whatever follows will be tried against every character +position in the subject string, so there is no point in +retrying the overall match at any position after the first. +PCRE treats such a pattern as though it were preceded by \A. + +When a capturing subpattern is repeated, the value captured +is the substring that matched the final iteration. For example, +after + +@example +(tweedle[dume]@{3@}\s*)+ +@end example + +@noindent +has matched @samp{tweedledum tweedledee} the value of the +captured substring is @samp{tweedledee}. However, if there are +nested capturing subpatterns, the corresponding captured +values may have been set in previous iterations. For example, +after + +@example +/(a|(b))+/ +@end example + +matches @samp{aba}, the value of the second captured substring is +@samp{b}. + +@node Backreferences +@appendixsec Backreferences +@cindex Perl-style regular expressions, backreferences + +Outside a character class, a backslash followed by a digit +greater than 0 (and possibly further digits) is a back +reference to a capturing subpattern earlier (i.e. to its +left) in the pattern, provided there have been that many +previous capturing left parentheses. + +However, if the decimal number following the backslash is +less than 10, it is always taken as a back reference, and +causes an error only if there are not that many capturing +left parentheses in the entire pattern. In other words, the +parentheses that are referenced need not be to the left of +the reference for numbers less than 10. @ref{Backslash} +for further details of the handling of digits following a backslash. + +A back reference matches whatever actually matched the capturing +subpattern in the current subject string, rather than +anything matching the subpattern itself. So the pattern + +@example +(sens|respons)e and \1ibility +@end example + +@noindent +matches @samp{sense and sensibility} and @samp{response and responsibility}, +but not @samp{sense and responsibility}. If caseful +matching is in force at the time of the back reference, the +case of letters is relevant. For example, + +@example +((?i)blah)\s+\1 +@end example + +@noindent +matches @samp{blah blah} and @samp{Blah Blah}, but not +@samp{BLAH blah}, even though the original capturing +subpattern is matched caselessly. + +There may be more than one back reference to the same subpattern. +Also, if a subpattern has not actually been used in a +particular match, any back references to it always fail. For +example, the pattern + +@example +(a|(bc))\2 +@end example + +@noindent +always fails if it starts to match @samp{a} rather than +@samp{bc}. Because there may be up to 99 back references, all +digits following the backslash are taken as part of a potential +back reference number; this is different from what happens +in @sc{posix} mode. If the pattern continues with a digit +character, some delimiter must be used to terminate the back +reference. If the @code{X} modifier option is set, this can be +whitespace. Otherwise an empty comment can be used, or the +following character can be expressed in hexadecimal or octal. +Note that this applies only to the LHS pattern; it is +not possible yet to specify more than 9 backreferences on the +RHS of the `s' command. + +A back reference that occurs inside the parentheses to which +it refers fails when the subpattern is first used, so, for +example, @code{(a\1)} never matches. However, such references +can be useful inside repeated subpatterns. For example, the +pattern + +@example +(a|b\1)+ +@end example + +@noindent +matches any number of @samp{a}s and also @samp{aba}, @samp{ababbaa}, +etc. At each iteration of the subpattern, the back reference matches +the character string corresponding to the previous iteration. In +order for this to work, the pattern must be such that the first +iteration does not need to match the back reference. This can be +done using alternation, as in the example above, or by a +quantifier with a minimum of zero. + +@node Assertions +@appendixsec Assertions +@cindex Perl-style regular expressions, assertions +@cindex Perl-style regular expressions, asserting subpatterns + +An assertion is a test on the characters following or +preceding the current matching point that does not actually +consume any characters. The simple assertions coded as @code{\b}, +@code{\B}, @code{\A}, @code{\Z}, @code{\z}, @code{^} and @code{$} +are described above. More complicated assertions are coded as +subpatterns. There are two kinds: those that look ahead of the +current position in the subject string, and those that look behind it. + +@cindex Perl-style regular expressions, lookahead subpatterns +An assertion subpattern is matched in the normal way, except +that it does not cause the current matching position to be +changed. Lookahead assertions start with @code{(?=} for positive +assertions and @code{(?!} for negative assertions. For example, + +@example +\w+(?=;) +@end example + +@noindent +matches a word followed by a semicolon, but does not include +the semicolon in the match, and + +@example +foo(?!bar) +@end example + +@noindent +matches any occurrence of @samp{foo} that is not followed by +@samp{bar}. + +Note that the apparently similar pattern + +@example +(?!foo)bar +@end example + +@noindent +@cindex Perl-style regular expressions, lookbehind subpatterns +finds any occurrence of @samp{bar} even if it is preceded by +@samp{foo}, because the assertion @code{(?!foo)} is always true +when the next three characters are @samp{bar}. A lookbehind +assertion is needed to achieve this effect. +Lookbehind assertions start with @code{(?<=} for positive +assertions and @code{(?<!} for negative assertions. So, + +@example +(?<!foo)bar +@end example + +achieves the required effect of finding an occurrence of +@samp{bar} that is not preceded by @samp{foo}. The contents of a +lookbehind assertion are restricted +such that all the strings it matches must have a fixed +length. However, if there are several alternatives, they do +not all have to have the same fixed length. This is an extension +compared with Perl 5.005, which requires all branches to match +the same length of string. Thus + +@example +(?<=dogs|cats|) +@end example + +@noindent +is permitted, but the apparently equivalent regular expression + +@example +(?<!dogs?|cats?) +@end example + +@noindent +causes an error at compile time. Branches that match different +length strings are permitted only at the top level of +a lookbehind assertion: an assertion such as + +@example +(?<=ab(c|de)) +@end example + +@noindent +is not permitted, because its single top-level branch can +match two different lengths, but it is acceptable if rewritten +to use two top-level branches: + +@example +(?<=abc|abde) +@end example + +All this is required because lookbehind assertions simply +move the current position back by the alternative's fixed +width and then try to match. If there are +insufficient characters before the current position, the +match is deemed to fail. Lookbehinds, in conjunction with +non-backtracking subpatterns can be particularly useful for +matching at the ends of strings; an example is given at the end +of the section on non-backtracking subpatterns. + +Several assertions (of any sort) may occur in succession. +For example, + +@example +(?<=\d@{3@})(?<!999)foo +@end example + +@noindent +matches @samp{foo} preceded by three digits that are not @samp{999}. +Notice that each of the assertions is applied independently +at the same point in the subject string. First there is a +check that the previous three characters are all digits, and +then there is a check that the same three characters are not +@samp{999}. This pattern does not match @samp{foo} preceded by six +characters, the first of which are digits and the last three +of which are not @samp{999}. For example, it doesn't match +@samp{123abcfoo}. A pattern to do that is + +@example +(?<=\d@{3@}...)(?<!999)foo +@end example + +@noindent +This time the first assertion looks at the preceding six +characters, checking that the first three are digits, and +then the second assertion checks that the preceding three +characters are not @samp{999}. Actually, assertions can be +nested in any combination, so one can write this as + +@example +(?<=\d@{3@}(?!999)...)foo +@end example + +or + +@example +(?<=\d@{3@}...(?<!999))foo +@end example + +@noindent +both of which might be considered more readable. + +Assertion subpatterns are not capturing subpatterns, and may +not be repeated, because it makes no sense to assert the +same thing several times. If any kind of assertion contains +capturing subpatterns within it, these are counted for the +purposes of numbering the capturing subpatterns in the whole +pattern. However, substring capturing is carried out only +for positive assertions, because it does not make sense for +negative assertions. + +Assertions count towards the maximum of 200 parenthesized +subpatterns. + +@node Non-backtracking subpatterns +@appendixsec Non-backtracking subpatterns +@cindex Perl-style regular expressions, non-backtracking subpatterns + +With both maximizing and minimizing repetition, failure of +what follows normally causes the repeated item to be evaluated +again to see if a different number of repeats allows the +rest of the pattern to match. Sometimes it is useful to +prevent this, either to change the nature of the match, or +to cause it fail earlier than it otherwise might, when the +author of the pattern knows there is no point in carrying +on. + +Consider, for example, the pattern @code{\d+foo} when applied to +the subject line + +@example +123456bar +@end example + +After matching all 6 digits and then failing to match @samp{foo}, +the normal action of the matcher is to try again with only 5 +digits matching the @code{\d+} item, and then with 4, and so on, +before ultimately failing. Non-backtracking subpatterns +provide the means for specifying that once a portion of the +pattern has matched, it is not to be re-evaluated in this way, +so the matcher would give up immediately on failing to match +@samp{foo} the first time. The notation is another kind of special +parenthesis, starting with @code{(?>} as in this example: + +@example +(?>\d+)bar +@end example + +This kind of parenthesis ``locks up'' the part of the pattern +it contains once it has matched, and a failure further into +the pattern is prevented from backtracking into it. +Backtracking past it to previous items, however, works as +normal. + +Non-backtracking subpatterns are not capturing subpatterns. Simple +cases such as the above example can be thought of as a maximizing +repeat that must swallow everything it can. So, +while both @code{\d+} and @code{\d+?} are prepared to adjust the number of +digits they match in order to make the rest of the pattern +match, @code{(?>\d+)} can only match an entire sequence of digits. + +This construction can of course contain arbitrarily complicated +subpatterns, and it can be nested. + +@cindex Perl-style regular expressions, lookbehind subpatterns +Non-backtracking subpatterns can be used in conjunction with look-behind +assertions to specify efficient matching at the end +of the subject string. Consider a simple pattern such as + +@example +abcd$ +@end example + +@noindent +when applied to a long string which does not match. Because +matching proceeds from left to right, @command{sed} will look for +each @samp{a} in the subject and then see if what follows matches +the rest of the pattern. If the pattern is specified as + +@example +^.*abcd$ +@end example + +@noindent +the initial @code{.*} matches the entire string at first, but when +this fails (because there is no following @samp{a}), it backtracks +to match all but the last character, then all but the +last two characters, and so on. Once again the search for +@samp{a} covers the entire string, from right to left, so we are +no better off. However, if the pattern is written as + +@example +^(?>.*)(?<=abcd) +@end example + +there can be no backtracking for the .* item; it can match +only the entire string. The subsequent lookbehind assertion +does a single test on the last four characters. If it fails, +the match fails immediately. For long strings, this approach +makes a significant difference to the processing time. + +When a pattern contains an unlimited repeat inside a subpattern +that can itself be repeated an unlimited number of +times, the use of a once-only subpattern is the only way to +avoid some failing matches taking a very long time +indeed.@footnote{Actually, the matcher embedded in @value{SSED} +tries to do something for this in the simplest cases, +like @code{([^b]*b)*}. These cases are actually quite +common: they happen for example in a regular expression +like @code{\/\*([^*]*\*)*\/} which matches C comments.} + +The pattern + +@example +(\D+|<\d+>)*[!?] +@end example + +([^0-9<]+<(\d+>)?)*[!?] + +@noindent +matches an unlimited number of substrings that either consist +of non-digits, or digits enclosed in angular brackets, followed by +an exclamation or question mark. When it matches, it runs quickly. +However, if it is applied to + +@example +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +@end example + +@noindent +it takes a long time before reporting failure. This is +because the string can be divided between the two repeats in +a large number of ways, and all have to be tried.@footnote{The +example used @code{[!?]} rather than a single character at the end, +because both @value{SSED} and Perl have an optimization that allows +for fast failure when a single character is used. They +remember the last single character that is required for a +match, and fail early if it is not present in the string.} + +If the pattern is changed to + +@example +((?>\D+)|<\d+>)*[!?] +@end example + +sequences of non-digits cannot be broken, and failure happens +quickly. + +@node Conditional subpatterns +@appendixsec Conditional subpatterns +@cindex Perl-style regular expressions, conditional subpatterns + +It is possible to cause the matching process to obey a subpattern +conditionally or to choose between two alternative +subpatterns, depending on the result of an assertion, or +whether a previous capturing subpattern matched or not. The +two possible forms of conditional subpattern are + +@example +(?(@var{condition})@var{yes-pattern}) +(?(@var{condition})@var{yes-pattern}|@var{no-pattern}) +@end example + +If the condition is satisfied, the yes-pattern is used; otherwise +the no-pattern (if present) is used. If there are more than two +alternatives in the subpattern, a compile-time error occurs. + +There are two kinds of condition. If the text between the +parentheses consists of a sequence of digits, the condition +is satisfied if the capturing subpattern of that number has +previously matched. The number must be greater than zero. +Consider the following pattern, which contains non-significant +white space to make it more readable (assume the @code{X} modifier) +and to divide it into three parts for ease of discussion: + +@example +( \( )? [^()]+ (?(1) \) ) +@end example + +The first part matches an optional opening parenthesis, and +if that character is present, sets it as the first captured +substring. The second part matches one or more characters +that are not parentheses. The third part is a conditional +subpattern that tests whether the first set of parentheses +matched or not. If they did, that is, if subject started +with an opening parenthesis, the condition is true, and so +the yes-pattern is executed and a closing parenthesis is +required. Otherwise, since no-pattern is not present, the +subpattern matches nothing. In other words, this pattern +matches a sequence of non-parentheses, optionally enclosed +in parentheses. + +@cindex Perl-style regular expressions, lookahead subpatterns +If the condition is not a sequence of digits, it must be an +assertion. This may be a positive or negative lookahead or +lookbehind assertion. Consider this pattern, again containing +non-significant white space, and with the two alternatives +on the second line: + +@example +(?(?=...[a-z]) + \d\d-[a-z]@{3@}-\d\d | + \d\d-\d\d-\d\d ) +@end example + +The condition is a positive lookahead assertion that matches +a letter that is three characters away from the current point. +If a letter is found, the subject is matched against the first +alternative @samp{@var{dd}-@var{aaa}-@var{dd}} (where @var{aaa} are +letters and @var{dd} are digits); otherwise it is matched against +the second alternative, @samp{@var{dd}-@var{dd}-@var{dd}}. + + +@node Recursive patterns +@appendixsec Recursive patterns +@cindex Perl-style regular expressions, recursive patterns +@cindex Perl-style regular expressions, recursion + +Consider the problem of matching a string in parentheses, +allowing for unlimited nested parentheses. Without the use +of recursion, the best that can be done is to use a pattern +that matches up to some fixed depth of nesting. It is not +possible to handle an arbitrary nesting depth. Perl 5.6 has +provided an experimental facility that allows regular +expressions to recurse (amongst other things). It does this +by interpolating Perl code in the expression at run time, +and the code can refer to the expression itself. A Perl pattern +tern to solve the parentheses problem can be created like +this: + +@example +$re = qr@{\( (?: (?>[^()]+) | (?p@{$re@}) )* \)@}x; +@end example + +The @code{(?p@{...@})} item interpolates Perl code at run time, +and in this case refers recursively to the pattern in which it +appears. Obviously, @command{sed} cannot support the interpolation of +Perl code. Instead, the special item @code{(?R)} is provided for +the specific case of recursion. This pattern solves the +parentheses problem (assume the @code{X} modifier option is used +so that white space is ignored): + +@example +\( ( (?>[^()]+) | (?R) )* \) +@end example + +First it matches an opening parenthesis. Then it matches any +number of substrings which can either be a sequence of +non-parentheses, or a recursive match of the pattern itself +(i.e. a correctly parenthesized substring). Finally there is +a closing parenthesis. + +This particular example pattern contains nested unlimited +repeats, and so the use of a non-backtracking subpattern for +matching strings of non-parentheses is important when applying +the pattern to strings that do not match. For example, when +it is applied to + +@example +(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa() +@end example + +it yields a ``no match'' response quickly. However, if a +standard backtracking subpattern is not used, the match runs +for a very long time indeed because there are so many different +ways the @code{+} and @code{*} repeats can carve up the subject, +and all have to be tested before failure can be reported. + +The values set for any capturing subpatterns are those from +the outermost level of the recursion at which the subpattern +value is set. If the pattern above is matched against + +@example +(ab(cd)ef) +@end example + +@noindent +the value for the capturing parentheses is @samp{ef}, which is +the last value taken on at the top level. + +@node Comments +@appendixsec Comments +@cindex Perl-style regular expressions, comments + +The sequence (?# marks the start of a comment which continues +ues up to the next closing parenthesis. Nested parentheses +are not permitted. The characters that make up a comment +play no part in the pattern matching at all. + +@cindex Perl-style regular expressions, extended +If the @code{X} modifier option is used, an unescaped @code{#} character +outside a character class introduces a comment that continues +up to the next newline character in the pattern. +@end ifset + + +@page +@node Concept Index +@unnumbered Concept Index + +This is a general index of all issues discussed in this manual, with the +exception of the @command{sed} commands and command-line options. + +@printindex cp + +@page +@node Command and Option Index +@unnumbered Command and Option Index + +This is an alphabetical list of all @command{sed} commands and command-line +options. + +@printindex fn + +@contents +@bye + +@c XXX FIXME: the term "cycle" is never defined... diff --git a/doc/sed.1 b/doc/sed.1 new file mode 100644 index 0000000..440287a --- /dev/null +++ b/doc/sed.1 @@ -0,0 +1,411 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.28. +.TH SED "1" "December 2012" "sed 4.2.2" "User Commands" +.SH NAME +sed \- stream editor for filtering and transforming text +.SH SYNOPSIS +.B sed +[\fIOPTION\fR]... \fI{script-only-if-no-other-script} \fR[\fIinput-file\fR]... +.SH DESCRIPTION +.ds sd \fIsed\fP +.ds Sd \fISed\fP +\*(Sd is a stream editor. +A stream editor is used to perform basic text +transformations on an input stream +(a file or input from a pipeline). +While in some ways similar to an editor which +permits scripted edits (such as \fIed\fP), +\*(sd works by making only one pass over the +input(s), and is consequently more efficient. +But it is \*(sd's ability to filter text in a pipeline +which particularly distinguishes it from other types of +editors. +.HP +\fB\-n\fR, \fB\-\-quiet\fR, \fB\-\-silent\fR +.IP +suppress automatic printing of pattern space +.HP +\fB\-e\fR script, \fB\-\-expression\fR=\fIscript\fR +.IP +add the script to the commands to be executed +.HP +\fB\-f\fR script-file, \fB\-\-file\fR=\fIscript\-file\fR +.IP +add the contents of script-file to the commands to be executed +.HP +\fB\-\-follow\-symlinks\fR +.IP +follow symlinks when processing in place +.HP +\fB\-i[SUFFIX]\fR, \fB\-\-in\-place\fR[=\fISUFFIX\fR] +.IP +edit files in place (makes backup if SUFFIX supplied) +.HP +\fB\-l\fR N, \fB\-\-line\-length\fR=\fIN\fR +.IP +specify the desired line-wrap length for the `l' command +.HP +\fB\-\-posix\fR +.IP +disable all GNU extensions. +.HP +\fB\-r\fR, \fB\-\-regexp\-extended\fR +.IP +use extended regular expressions in the script. +.HP +\fB\-s\fR, \fB\-\-separate\fR +.IP +consider files as separate rather than as a single continuous +long stream. +.HP +\fB\-u\fR, \fB\-\-unbuffered\fR +.IP +load minimal amounts of data from the input files and flush +the output buffers more often +.HP +\fB\-z\fR, \fB\-\-null\-data\fR +.IP +separate lines by NUL characters +.TP +\fB\-\-help\fR +display this help and exit +.TP +\fB\-\-version\fR +output version information and exit +.PP +If no \fB\-e\fR, \fB\-\-expression\fR, \fB\-f\fR, or \fB\-\-file\fR option is given, then the first +non-option argument is taken as the sed script to interpret. All +remaining arguments are names of input files; if no input files are +specified, then the standard input is read. +.PP +GNU sed home page: <http://www.gnu.org/software/sed/>. +General help using GNU software: <http://www.gnu.org/gethelp/>. +E-mail bug reports to: <bug-sed@gnu.org>. +Be sure to include the word ``sed'' somewhere in the ``Subject:'' field. +.SH "COMMAND SYNOPSIS" +This is just a brief synopsis of \*(sd commands to serve as +a reminder to those who already know \*(sd; +other documentation (such as the texinfo document) +must be consulted for fuller descriptions. +.SS +Zero-address ``commands'' +.TP +.RI :\ label +Label for +.B b +and +.B t +commands. +.TP +.RI # comment +The comment extends until the next newline (or the end of a +.B -e +script fragment). +.TP +} +The closing bracket of a { } block. +.SS +Zero- or One- address commands +.TP += +Print the current line number. +.TP +a \e +.TP +.I text +Append +.IR text , +which has each embedded newline preceded by a backslash. +.TP +i \e +.TP +.I text +Insert +.IR text , +which has each embedded newline preceded by a backslash. +.TP +q [\fIexit-code\fR] +Immediately quit the \*(sd script without processing +any more input, except that if auto-print is not disabled +the current pattern space will be printed. The exit code +argument is a GNU extension. +.TP +Q [\fIexit-code\fR] +Immediately quit the \*(sd script without processing +any more input. This is a GNU extension. +.TP +.RI r\ filename +Append text read from +.IR filename . +.TP +.RI R\ filename +Append a line read from +.IR filename . +Each invocation of the command reads a line from the file. +This is a GNU extension. +.SS +Commands which accept address ranges +.TP +{ +Begin a block of commands (end with a }). +.TP +.RI b\ label +Branch to +.IR label ; +if +.I label +is omitted, branch to end of script. +.TP +c \e +.TP +.I text +Replace the selected lines with +.IR text , +which has each embedded newline preceded by a backslash. +.TP +d +Delete pattern space. +Start next cycle. +.TP +D +If pattern space contains no newline, start a normal new cycle as if +the d command was issued. Otherwise, delete text in the pattern +space up to the first newline, and restart cycle with the resultant +pattern space, without reading a new line of input. +.TP +h H +Copy/append pattern space to hold space. +.TP +g G +Copy/append hold space to pattern space. +.TP +l +List out the current line in a ``visually unambiguous'' form. +.TP +.RI l\ width +List out the current line in a ``visually unambiguous'' form, +breaking it at +.I width +characters. This is a GNU extension. +.TP +n N +Read/append the next line of input into the pattern space. +.TP +p +Print the current pattern space. +.TP +P +Print up to the first embedded newline of the current pattern space. +.TP +.RI s/ regexp / replacement / +Attempt to match +.I regexp +against the pattern space. +If successful, replace that portion matched +with +.IR replacement . +The +.I replacement +may contain the special character +.B & +to refer to that portion of the pattern space which matched, +and the special escapes \e1 through \e9 to refer to the +corresponding matching sub-expressions in the +.IR regexp . +.TP +.RI t\ label +If a s/// has done a successful substitution since the +last input line was read and since the last t or T +command, then branch to +.IR label ; +if +.I label +is omitted, branch to end of script. +.TP +.RI T\ label +If no s/// has done a successful substitution since the +last input line was read and since the last t or T +command, then branch to +.IR label ; +if +.I label +is omitted, branch to end of script. This is a GNU +extension. +.TP +.RI w\ filename +Write the current pattern space to +.IR filename . +.TP +.RI W\ filename +Write the first line of the current pattern space to +.IR filename . +This is a GNU extension. +.TP +x +Exchange the contents of the hold and pattern spaces. +.TP +.RI y/ source / dest / +Transliterate the characters in the pattern space which appear in +.I source +to the corresponding character in +.IR dest . +.SH +Addresses +\*(Sd commands can be given with no addresses, in which +case the command will be executed for all input lines; +with one address, in which case the command will only be executed +for input lines which match that address; or with two +addresses, in which case the command will be executed +for all input lines which match the inclusive range of +lines starting from the first address and continuing to +the second address. +Three things to note about address ranges: +the syntax is +.IR addr1 , addr2 +(i.e., the addresses are separated by a comma); +the line which +.I addr1 +matched will always be accepted, +even if +.I addr2 +selects an earlier line; +and if +.I addr2 +is a +.IR regexp , +it will not be tested against the line that +.I addr1 +matched. +.PP +After the address (or address-range), +and before the command, a +.B ! +may be inserted, +which specifies that the command shall only be +executed if the address (or address-range) does +.B not +match. +.PP +The following address types are supported: +.TP +.I number +Match only the specified line +.IR number +(which increments cumulatively across files, unless the +.B -s +option is specified on the command line). +.TP +.IR first ~ step +Match every +.IR step 'th +line starting with line +.IR first . +For example, ``sed -n 1~2p'' will print all the odd-numbered lines in +the input stream, and the address 2~5 will match every fifth line, +starting with the second. +.I first +can be zero; in this case, \*(sd operates as if it were equal to +.IR step . +(This is an extension.) +.TP +$ +Match the last line. +.TP +.RI / regexp / +Match lines matching the regular expression +.IR regexp . +.TP +.BI \fR\e\fPc regexp c +Match lines matching the regular expression +.IR regexp . +The +.B c +may be any character. +.PP +GNU \*(sd also supports some special 2-address forms: +.TP +.RI 0, addr2 +Start out in "matched first address" state, until +.I addr2 +is found. +This is similar to +.RI 1, addr2 , +except that if +.I addr2 +matches the very first line of input the +.RI 0, addr2 +form will be at the end of its range, whereas the +.RI 1, addr2 +form will still be at the beginning of its range. +This works only when +.I addr2 +is a regular expression. +.TP +.IR addr1 ,+ N +Will match +.I addr1 +and the +.I N +lines following +.IR addr1 . +.TP +.IR addr1 ,~ N +Will match +.I addr1 +and the lines following +.I addr1 +until the next line whose input line number is a multiple of +.IR N . +.SH "REGULAR EXPRESSIONS" +POSIX.2 BREs +.I should +be supported, but they aren't completely because of performance +problems. +The +.B \en +sequence in a regular expression matches the newline character, +and similarly for +.BR \ea , +.BR \et , +and other sequences. +.SH BUGS +.PP +E-mail bug reports to +.BR bug-sed@gnu.org . +Also, please include the output of ``sed --version'' in the body +of your report if at all possible. +.SH AUTHOR +Written by Jay Fenlason, Tom Lord, Ken Pizzini, +and Paolo Bonzini. +GNU sed home page: <http://www.gnu.org/software/sed/>. +General help using GNU software: <http://www.gnu.org/gethelp/>. +E-mail bug reports to: <bug-sed@gnu.org>. +Be sure to include the word ``sed'' somewhere in the ``Subject:'' field. +.SH COPYRIGHT +Copyright \(co 2012 Free Software Foundation, Inc. +License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>. +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +.BR awk (1), +.BR ed (1), +.BR grep (1), +.BR tr (1), +.BR perlre (1), +sed.info, +any of various books on \*(sd, +.na +the \*(sd FAQ (http://sed.sf.net/grabbag/tutorials/sedfaq.txt), +http://sed.sf.net/grabbag/. +.PP +The full documentation for +.B sed +is maintained as a Texinfo manual. If the +.B info +and +.B sed +programs are properly installed at your site, the command +.IP +.B info sed +.PP +should give you access to the complete manual. diff --git a/doc/sed.info b/doc/sed.info new file mode 100644 index 0000000..38235da --- /dev/null +++ b/doc/sed.info @@ -0,0 +1,2612 @@ +This is ../../doc/sed.info, produced by makeinfo version 4.13 from +../../doc//config.texi. + +INFO-DIR-SECTION Text creation and manipulation +START-INFO-DIR-ENTRY +* sed: (sed). Stream EDitor. + +END-INFO-DIR-ENTRY + + This file documents version 4.2.2 of GNU `sed', a stream editor. + + Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004 Free Software +Foundation, Inc. + + This document is released under the terms of the GNU Free +Documentation License as published by the Free Software Foundation; +either version 1.1, or (at your option) any later version. + + You should have received a copy of the GNU Free Documentation +License along with GNU `sed'; see the file `COPYING.DOC'. If not, +write to the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02110-1301, USA. + + There are no Cover Texts and no Invariant Sections; this text, along +with its equivalent in the printed manual, constitutes the Title Page. + + +File: sed.info, Node: Top, Next: Introduction, Up: (dir) + +sed, a stream editor +******************** + +This file documents version 4.2.2 of GNU `sed', a stream editor. + + Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004 Free Software +Foundation, Inc. + + This document is released under the terms of the GNU Free +Documentation License as published by the Free Software Foundation; +either version 1.1, or (at your option) any later version. + + You should have received a copy of the GNU Free Documentation +License along with GNU `sed'; see the file `COPYING.DOC'. If not, +write to the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02110-1301, USA. + + There are no Cover Texts and no Invariant Sections; this text, along +with its equivalent in the printed manual, constitutes the Title Page. + +* Menu: + +* Introduction:: Introduction +* Invoking sed:: Invocation +* sed Programs:: `sed' programs +* Examples:: Some sample scripts +* Limitations:: Limitations and (non-)limitations of GNU `sed' +* Other Resources:: Other resources for learning about `sed' +* Reporting Bugs:: Reporting bugs + +* Extended regexps:: `egrep'-style regular expressions + +* Concept Index:: A menu with all the topics in this manual. +* Command and Option Index:: A menu with all `sed' commands and + command-line options. + +--- The detailed node listing --- + +sed Programs: +* Execution Cycle:: How `sed' works +* Addresses:: Selecting lines with `sed' +* Regular Expressions:: Overview of regular expression syntax +* Common Commands:: Often used commands +* The "s" Command:: `sed''s Swiss Army Knife +* Other Commands:: Less frequently used commands +* Programming Commands:: Commands for `sed' gurus +* Extended Commands:: Commands specific of GNU `sed' +* Escapes:: Specifying special characters + +Examples: +* Centering lines:: +* Increment a number:: +* Rename files to lower case:: +* Print bash environment:: +* Reverse chars of lines:: +* tac:: Reverse lines of files +* cat -n:: Numbering lines +* cat -b:: Numbering non-blank lines +* wc -c:: Counting chars +* wc -w:: Counting words +* wc -l:: Counting lines +* head:: Printing the first lines +* tail:: Printing the last lines +* uniq:: Make duplicate lines unique +* uniq -d:: Print duplicated lines of input +* uniq -u:: Remove all duplicated lines +* cat -s:: Squeezing blank lines + + +File: sed.info, Node: Introduction, Next: Invoking sed, Prev: Top, Up: Top + +1 Introduction +************** + +`sed' is a stream editor. A stream editor is used to perform basic text +transformations on an input stream (a file or input from a pipeline). +While in some ways similar to an editor which permits scripted edits +(such as `ed'), `sed' works by making only one pass over the input(s), +and is consequently more efficient. But it is `sed''s ability to +filter text in a pipeline which particularly distinguishes it from +other types of editors. + + +File: sed.info, Node: Invoking sed, Next: sed Programs, Prev: Introduction, Up: Top + +2 Invocation +************ + +Normally `sed' is invoked like this: + + sed SCRIPT INPUTFILE... + + The full format for invoking `sed' is: + + sed OPTIONS... [SCRIPT] [INPUTFILE...] + + If you do not specify INPUTFILE, or if INPUTFILE is `-', `sed' +filters the contents of the standard input. The SCRIPT is actually the +first non-option parameter, which `sed' specially considers a script +and not an input file if (and only if) none of the other OPTIONS +specifies a script to be executed, that is if neither of the `-e' and +`-f' options is specified. + + `sed' may be invoked with the following command-line options: + +`--version' + Print out the version of `sed' that is being run and a copyright + notice, then exit. + +`--help' + Print a usage message briefly summarizing these command-line + options and the bug-reporting address, then exit. + +`-n' +`--quiet' +`--silent' + By default, `sed' prints out the pattern space at the end of each + cycle through the script (*note How `sed' works: Execution Cycle.). + These options disable this automatic printing, and `sed' only + produces output when explicitly told to via the `p' command. + +`-e SCRIPT' +`--expression=SCRIPT' + Add the commands in SCRIPT to the set of commands to be run while + processing the input. + +`-f SCRIPT-FILE' +`--file=SCRIPT-FILE' + Add the commands contained in the file SCRIPT-FILE to the set of + commands to be run while processing the input. + +`-i[SUFFIX]' +`--in-place[=SUFFIX]' + This option specifies that files are to be edited in-place. GNU + `sed' does this by creating a temporary file and sending output to + this file rather than to the standard output.(1). + + This option implies `-s'. + + When the end of the file is reached, the temporary file is renamed + to the output file's original name. The extension, if supplied, + is used to modify the name of the old file before renaming the + temporary file, thereby making a backup copy(2)). + + This rule is followed: if the extension doesn't contain a `*', + then it is appended to the end of the current filename as a + suffix; if the extension does contain one or more `*' characters, + then _each_ asterisk is replaced with the current filename. This + allows you to add a prefix to the backup file, instead of (or in + addition to) a suffix, or even to place backup copies of the + original files into another directory (provided the directory + already exists). + + If no extension is supplied, the original file is overwritten + without making a backup. + +`-l N' +`--line-length=N' + Specify the default line-wrap length for the `l' command. A + length of 0 (zero) means to never wrap long lines. If not + specified, it is taken to be 70. + +`--posix' + GNU `sed' includes several extensions to POSIX sed. In order to + simplify writing portable scripts, this option disables all the + extensions that this manual documents, including additional + commands. Most of the extensions accept `sed' programs that are + outside the syntax mandated by POSIX, but some of them (such as + the behavior of the `N' command described in *note Reporting + Bugs::) actually violate the standard. If you want to disable + only the latter kind of extension, you can set the + `POSIXLY_CORRECT' variable to a non-empty value. + +`-b' +`--binary' + This option is available on every platform, but is only effective + where the operating system makes a distinction between text files + and binary files. When such a distinction is made--as is the case + for MS-DOS, Windows, Cygwin--text files are composed of lines + separated by a carriage return _and_ a line feed character, and + `sed' does not see the ending CR. When this option is specified, + `sed' will open input files in binary mode, thus not requesting + this special processing and considering lines to end at a line + feed. + +`--follow-symlinks' + This option is available only on platforms that support symbolic + links and has an effect only if option `-i' is specified. In this + case, if the file that is specified on the command line is a + symbolic link, `sed' will follow the link and edit the ultimate + destination of the link. The default behavior is to break the + symbolic link, so that the link destination will not be modified. + +`-r' +`--regexp-extended' + Use extended regular expressions rather than basic regular + expressions. Extended regexps are those that `egrep' accepts; + they can be clearer because they usually have less backslashes, + but are a GNU extension and hence scripts that use them are not + portable. *Note Extended regular expressions: Extended regexps. + +`-s' +`--separate' + By default, `sed' will consider the files specified on the command + line as a single continuous long stream. This GNU `sed' extension + allows the user to consider them as separate files: range + addresses (such as `/abc/,/def/') are not allowed to span several + files, line numbers are relative to the start of each file, `$' + refers to the last line of each file, and files invoked from the + `R' commands are rewound at the start of each file. + +`-u' +`--unbuffered' + Buffer both input and output as minimally as practical. (This is + particularly useful if the input is coming from the likes of `tail + -f', and you wish to see the transformed output as soon as + possible.) + +`-z' +`--null-data' +`--zero-terminated' + Treat the input as a set of lines, each terminated by a zero byte + (the ASCII `NUL' character) instead of a newline. This option can + be used with commands like `sort -z' and `find -print0' to process + arbitrary file names. + + If no `-e', `-f', `--expression', or `--file' options are given on +the command-line, then the first non-option argument on the command +line is taken to be the SCRIPT to be executed. + + If any command-line parameters remain after processing the above, +these parameters are interpreted as the names of input files to be +processed. A file name of `-' refers to the standard input stream. +The standard input will be processed if no file names are specified. + + ---------- Footnotes ---------- + + (1) This applies to commands such as `=', `a', `c', `i', `l', `p'. +You can still write to the standard output by using the `w' or `W' +commands together with the `/dev/stdout' special file + + (2) Note that GNU `sed' creates the backup file whether or not any +output is actually changed. + + +File: sed.info, Node: sed Programs, Next: Examples, Prev: Invoking sed, Up: Top + +3 `sed' Programs +**************** + +A `sed' program consists of one or more `sed' commands, passed in by +one or more of the `-e', `-f', `--expression', and `--file' options, or +the first non-option argument if zero of these options are used. This +document will refer to "the" `sed' script; this is understood to mean +the in-order catenation of all of the SCRIPTs and SCRIPT-FILEs passed +in. + + Commands within a SCRIPT or SCRIPT-FILE can be separated by +semicolons (`;') or newlines (ASCII 10). Some commands, due to their +syntax, cannot be followed by semicolons working as command separators +and thus should be terminated with newlines or be placed at the end of +a SCRIPT or SCRIPT-FILE. Commands can also be preceded with optional +non-significant whitespace characters. + + Each `sed' command consists of an optional address or address range, +followed by a one-character command name and any additional +command-specific code. + +* Menu: + +* Execution Cycle:: How `sed' works +* Addresses:: Selecting lines with `sed' +* Regular Expressions:: Overview of regular expression syntax +* Common Commands:: Often used commands +* The "s" Command:: `sed''s Swiss Army Knife +* Other Commands:: Less frequently used commands +* Programming Commands:: Commands for `sed' gurus +* Extended Commands:: Commands specific of GNU `sed' +* Escapes:: Specifying special characters + + +File: sed.info, Node: Execution Cycle, Next: Addresses, Up: sed Programs + +3.1 How `sed' Works +=================== + +`sed' maintains two data buffers: the active _pattern_ space, and the +auxiliary _hold_ space. Both are initially empty. + + `sed' operates by performing the following cycle on each line of +input: first, `sed' reads one line from the input stream, removes any +trailing newline, and places it in the pattern space. Then commands +are executed; each command can have an address associated to it: +addresses are a kind of condition code, and a command is only executed +if the condition is verified before the command is to be executed. + + When the end of the script is reached, unless the `-n' option is in +use, the contents of pattern space are printed out to the output +stream, adding back the trailing newline if it was removed.(1) Then the +next cycle starts for the next input line. + + Unless special commands (like `D') are used, the pattern space is +deleted between two cycles. The hold space, on the other hand, keeps +its data between cycles (see commands `h', `H', `x', `g', `G' to move +data between both buffers). + + ---------- Footnotes ---------- + + (1) Actually, if `sed' prints a line without the terminating +newline, it will nevertheless print the missing newline as soon as more +text is sent to the same output stream, which gives the "least expected +surprise" even though it does not make commands like `sed -n p' exactly +identical to `cat'. + + +File: sed.info, Node: Addresses, Next: Regular Expressions, Prev: Execution Cycle, Up: sed Programs + +3.2 Selecting lines with `sed' +============================== + +Addresses in a `sed' script can be in any of the following forms: +`NUMBER' + Specifying a line number will match only that line in the input. + (Note that `sed' counts lines continuously across all input files + unless `-i' or `-s' options are specified.) + +`FIRST~STEP' + This GNU extension matches every STEPth line starting with line + FIRST. In particular, lines will be selected when there exists a + non-negative N such that the current line-number equals FIRST + (N + * STEP). Thus, to select the odd-numbered lines, one would use + `1~2'; to pick every third line starting with the second, `2~3' + would be used; to pick every fifth line starting with the tenth, + use `10~5'; and `50~0' is just an obscure way of saying `50'. + +`$' + This address matches the last line of the last file of input, or + the last line of each file when the `-i' or `-s' options are + specified. + +`/REGEXP/' + This will select any line which matches the regular expression + REGEXP. If REGEXP itself includes any `/' characters, each must + be escaped by a backslash (`\'). + + The empty regular expression `//' repeats the last regular + expression match (the same holds if the empty regular expression is + passed to the `s' command). Note that modifiers to regular + expressions are evaluated when the regular expression is compiled, + thus it is invalid to specify them together with the empty regular + expression. + +`\%REGEXP%' + (The `%' may be replaced by any other single character.) + + This also matches the regular expression REGEXP, but allows one to + use a different delimiter than `/'. This is particularly useful + if the REGEXP itself contains a lot of slashes, since it avoids + the tedious escaping of every `/'. If REGEXP itself includes any + delimiter characters, each must be escaped by a backslash (`\'). + +`/REGEXP/I' +`\%REGEXP%I' + The `I' modifier to regular-expression matching is a GNU extension + which causes the REGEXP to be matched in a case-insensitive manner. + +`/REGEXP/M' +`\%REGEXP%M' + The `M' modifier to regular-expression matching is a GNU `sed' + extension which directs GNU `sed' to match the regular expression + in `multi-line' mode. The modifier causes `^' and `$' to match + respectively (in addition to the normal behavior) the empty string + after a newline, and the empty string before a newline. There are + special character sequences (`\`' and `\'') which always match the + beginning or the end of the buffer. In addition, the period + character does not match a new-line character in multi-line mode. + + + If no addresses are given, then all lines are matched; if one +address is given, then only lines matching that address are matched. + + An address range can be specified by specifying two addresses +separated by a comma (`,'). An address range matches lines starting +from where the first address matches, and continues until the second +address matches (inclusively). + + If the second address is a REGEXP, then checking for the ending +match will start with the line _following_ the line which matched the +first address: a range will always span at least two lines (except of +course if the input stream ends). + + If the second address is a NUMBER less than (or equal to) the line +matching the first address, then only the one line is matched. + + GNU `sed' also supports some special two-address forms; all these +are GNU extensions: +`0,/REGEXP/' + A line number of `0' can be used in an address specification like + `0,/REGEXP/' so that `sed' will try to match REGEXP in the first + input line too. In other words, `0,/REGEXP/' is similar to + `1,/REGEXP/', except that if ADDR2 matches the very first line of + input the `0,/REGEXP/' form will consider it to end the range, + whereas the `1,/REGEXP/' form will match the beginning of its + range and hence make the range span up to the _second_ occurrence + of the regular expression. + + Note that this is the only place where the `0' address makes + sense; there is no 0-th line and commands which are given the `0' + address in any other way will give an error. + +`ADDR1,+N' + Matches ADDR1 and the N lines following ADDR1. + +`ADDR1,~N' + Matches ADDR1 and the lines following ADDR1 until the next line + whose input line number is a multiple of N. + + Appending the `!' character to the end of an address specification +negates the sense of the match. That is, if the `!' character follows +an address range, then only lines which do _not_ match the address range +will be selected. This also works for singleton addresses, and, +perhaps perversely, for the null address. + + +File: sed.info, Node: Regular Expressions, Next: Common Commands, Prev: Addresses, Up: sed Programs + +3.3 Overview of Regular Expression Syntax +========================================= + +To know how to use `sed', people should understand regular expressions +("regexp" for short). A regular expression is a pattern that is +matched against a subject string from left to right. Most characters +are "ordinary": they stand for themselves in a pattern, and match the +corresponding characters in the subject. As a trivial example, the +pattern + + The quick brown fox + +matches a portion of a subject string that is identical to itself. The +power of regular expressions comes from the ability to include +alternatives and repetitions in the pattern. These are encoded in the +pattern by the use of "special characters", which do not stand for +themselves but instead are interpreted in some special way. Here is a +brief description of regular expression syntax as used in `sed'. + +`CHAR' + A single ordinary character matches itself. + +`*' + Matches a sequence of zero or more instances of matches for the + preceding regular expression, which must be an ordinary character, + a special character preceded by `\', a `.', a grouped regexp (see + below), or a bracket expression. As a GNU extension, a postfixed + regular expression can also be followed by `*'; for example, `a**' + is equivalent to `a*'. POSIX 1003.1-2001 says that `*' stands for + itself when it appears at the start of a regular expression or + subexpression, but many nonGNU implementations do not support this + and portable scripts should instead use `\*' in these contexts. + +`\+' + As `*', but matches one or more. It is a GNU extension. + +`\?' + As `*', but only matches zero or one. It is a GNU extension. + +`\{I\}' + As `*', but matches exactly I sequences (I is a decimal integer; + for portability, keep it between 0 and 255 inclusive). + +`\{I,J\}' + Matches between I and J, inclusive, sequences. + +`\{I,\}' + Matches more than or equal to I sequences. + +`\(REGEXP\)' + Groups the inner REGEXP as a whole, this is used to: + + * Apply postfix operators, like `\(abcd\)*': this will search + for zero or more whole sequences of `abcd', while `abcd*' + would search for `abc' followed by zero or more occurrences + of `d'. Note that support for `\(abcd\)*' is required by + POSIX 1003.1-2001, but many non-GNU implementations do not + support it and hence it is not universally portable. + + * Use back references (see below). + +`.' + Matches any character, including newline. + +`^' + Matches the null string at beginning of the pattern space, i.e. + what appears after the circumflex must appear at the beginning of + the pattern space. + + In most scripts, pattern space is initialized to the content of + each line (*note How `sed' works: Execution Cycle.). So, it is a + useful simplification to think of `^#include' as matching only + lines where `#include' is the first thing on line--if there are + spaces before, for example, the match fails. This simplification + is valid as long as the original content of pattern space is not + modified, for example with an `s' command. + + `^' acts as a special character only at the beginning of the + regular expression or subexpression (that is, after `\(' or `\|'). + Portable scripts should avoid `^' at the beginning of a + subexpression, though, as POSIX allows implementations that treat + `^' as an ordinary character in that context. + +`$' + It is the same as `^', but refers to end of pattern space. `$' + also acts as a special character only at the end of the regular + expression or subexpression (that is, before `\)' or `\|'), and + its use at the end of a subexpression is not portable. + +`[LIST]' +`[^LIST]' + Matches any single character in LIST: for example, `[aeiou]' + matches all vowels. A list may include sequences like + `CHAR1-CHAR2', which matches any character between (inclusive) + CHAR1 and CHAR2. + + A leading `^' reverses the meaning of LIST, so that it matches any + single character _not_ in LIST. To include `]' in the list, make + it the first character (after the `^' if needed), to include `-' + in the list, make it the first or last; to include `^' put it + after the first character. + + The characters `$', `*', `.', `[', and `\' are normally not + special within LIST. For example, `[\*]' matches either `\' or + `*', because the `\' is not special here. However, strings like + `[.ch.]', `[=a=]', and `[:space:]' are special within LIST and + represent collating symbols, equivalence classes, and character + classes, respectively, and `[' is therefore special within LIST + when it is followed by `.', `=', or `:'. Also, when not in + `POSIXLY_CORRECT' mode, special escapes like `\n' and `\t' are + recognized within LIST. *Note Escapes::. + +`REGEXP1\|REGEXP2' + Matches either REGEXP1 or REGEXP2. Use parentheses to use complex + alternative regular expressions. The matching process tries each + alternative in turn, from left to right, and the first one that + succeeds is used. It is a GNU extension. + +`REGEXP1REGEXP2' + Matches the concatenation of REGEXP1 and REGEXP2. Concatenation + binds more tightly than `\|', `^', and `$', but less tightly than + the other regular expression operators. + +`\DIGIT' + Matches the DIGIT-th `\(...\)' parenthesized subexpression in the + regular expression. This is called a "back reference". + Subexpressions are implicity numbered by counting occurrences of + `\(' left-to-right. + +`\n' + Matches the newline character. + +`\CHAR' + Matches CHAR, where CHAR is one of `$', `*', `.', `[', `\', or `^'. + Note that the only C-like backslash sequences that you can + portably assume to be interpreted are `\n' and `\\'; in particular + `\t' is not portable, and matches a `t' under most implementations + of `sed', rather than a tab character. + + + Note that the regular expression matcher is greedy, i.e., matches +are attempted from left to right and, if two or more matches are +possible starting at the same character, it selects the longest. + +Examples: +`abcdef' + Matches `abcdef'. + +`a*b' + Matches zero or more `a's followed by a single `b'. For example, + `b' or `aaaaab'. + +`a\?b' + Matches `b' or `ab'. + +`a\+b\+' + Matches one or more `a's followed by one or more `b's: `ab' is the + shortest possible match, but other examples are `aaaab' or + `abbbbb' or `aaaaaabbbbbbb'. + +`.*' +`.\+' + These two both match all the characters in a string; however, the + first matches every string (including the empty string), while the + second matches only strings containing at least one character. + +`^main.*(.*)' + This matches a string starting with `main', followed by an opening + and closing parenthesis. The `n', `(' and `)' need not be + adjacent. + +`^#' + This matches a string beginning with `#'. + +`\\$' + This matches a string ending with a single backslash. The regexp + contains two backslashes for escaping. + +`\$' + Instead, this matches a string consisting of a single dollar sign, + because it is escaped. + +`[a-zA-Z0-9]' + In the C locale, this matches any ASCII letters or digits. + +`[^ tab]\+' + (Here `tab' stands for a single tab character.) This matches a + string of one or more characters, none of which is a space or a + tab. Usually this means a word. + +`^\(.*\)\n\1$' + This matches a string consisting of two equal substrings separated + by a newline. + +`.\{9\}A$' + This matches nine characters followed by an `A'. + +`^.\{15\}A' + This matches the start of a string that contains 16 characters, + the last of which is an `A'. + + + +File: sed.info, Node: Common Commands, Next: The "s" Command, Prev: Regular Expressions, Up: sed Programs + +3.4 Often-Used Commands +======================= + +If you use `sed' at all, you will quite likely want to know these +commands. + +`#' + [No addresses allowed.] + + The `#' character begins a comment; the comment continues until + the next newline. + + If you are concerned about portability, be aware that some + implementations of `sed' (which are not POSIX conformant) may only + support a single one-line comment, and then only when the very + first character of the script is a `#'. + + Warning: if the first two characters of the `sed' script are `#n', + then the `-n' (no-autoprint) option is forced. If you want to put + a comment in the first line of your script and that comment begins + with the letter `n' and you do not want this behavior, then be + sure to either use a capital `N', or place at least one space + before the `n'. + +`q [EXIT-CODE]' + This command only accepts a single address. + + Exit `sed' without processing any more commands or input. Note + that the current pattern space is printed if auto-print is not + disabled with the `-n' options. The ability to return an exit + code from the `sed' script is a GNU `sed' extension. + +`d' + Delete the pattern space; immediately start next cycle. + +`p' + Print out the pattern space (to the standard output). This + command is usually only used in conjunction with the `-n' + command-line option. + +`n' + If auto-print is not disabled, print the pattern space, then, + regardless, replace the pattern space with the next line of input. + If there is no more input then `sed' exits without processing any + more commands. + +`{ COMMANDS }' + A group of commands may be enclosed between `{' and `}' characters. + This is particularly useful when you want a group of commands to + be triggered by a single address (or address-range) match. + + + +File: sed.info, Node: The "s" Command, Next: Other Commands, Prev: Common Commands, Up: sed Programs + +3.5 The `s' Command +=================== + +The syntax of the `s' (as in substitute) command is +`s/REGEXP/REPLACEMENT/FLAGS'. The `/' characters may be uniformly +replaced by any other single character within any given `s' command. +The `/' character (or whatever other character is used in its stead) +can appear in the REGEXP or REPLACEMENT only if it is preceded by a `\' +character. + + The `s' command is probably the most important in `sed' and has a +lot of different options. Its basic concept is simple: the `s' command +attempts to match the pattern space against the supplied REGEXP; if the +match is successful, then that portion of the pattern space which was +matched is replaced with REPLACEMENT. + + The REPLACEMENT can contain `\N' (N being a number from 1 to 9, +inclusive) references, which refer to the portion of the match which is +contained between the Nth `\(' and its matching `\)'. Also, the +REPLACEMENT can contain unescaped `&' characters which reference the +whole matched portion of the pattern space. Finally, as a GNU `sed' +extension, you can include a special sequence made of a backslash and +one of the letters `L', `l', `U', `u', or `E'. The meaning is as +follows: + +`\L' + Turn the replacement to lowercase until a `\U' or `\E' is found, + +`\l' + Turn the next character to lowercase, + +`\U' + Turn the replacement to uppercase until a `\L' or `\E' is found, + +`\u' + Turn the next character to uppercase, + +`\E' + Stop case conversion started by `\L' or `\U'. + + When the `g' flag is being used, case conversion does not propagate +from one occurrence of the regular expression to another. For example, +when the following command is executed with `a-b-' in pattern space: + s/\(b\?\)-/x\u\1/g + +the output is `axxB'. When replacing the first `-', the `\u' sequence +only affects the empty replacement of `\1'. It does not affect the `x' +character that is added to pattern space when replacing `b-' with `xB'. + + On the other hand, `\l' and `\u' do affect the remainder of the +replacement text if they are followed by an empty substitution. With +`a-b-' in pattern space, the following command: + s/\(b\?\)-/\u\1x/g + +will replace `-' with `X' (uppercase) and `b-' with `Bx'. If this +behavior is undesirable, you can prevent it by adding a `\E' +sequence--after `\1' in this case. + + To include a literal `\', `&', or newline in the final replacement, +be sure to precede the desired `\', `&', or newline in the REPLACEMENT +with a `\'. + + The `s' command can be followed by zero or more of the following +FLAGS: + +`g' + Apply the replacement to _all_ matches to the REGEXP, not just the + first. + +`NUMBER' + Only replace the NUMBERth match of the REGEXP. + + Note: the POSIX standard does not specify what should happen when + you mix the `g' and NUMBER modifiers, and currently there is no + widely agreed upon meaning across `sed' implementations. For GNU + `sed', the interaction is defined to be: ignore matches before the + NUMBERth, and then match and replace all matches from the NUMBERth + on. + +`p' + If the substitution was made, then print the new pattern space. + + Note: when both the `p' and `e' options are specified, the + relative ordering of the two produces very different results. In + general, `ep' (evaluate then print) is what you want, but + operating the other way round can be useful for debugging. For + this reason, the current version of GNU `sed' interprets specially + the presence of `p' options both before and after `e', printing + the pattern space before and after evaluation, while in general + flags for the `s' command show their effect just once. This + behavior, although documented, might change in future versions. + +`w FILE-NAME' + If the substitution was made, then write out the result to the + named file. As a GNU `sed' extension, two special values of + FILE-NAME are supported: `/dev/stderr', which writes the result to + the standard error, and `/dev/stdout', which writes to the standard + output.(1) + +`e' + This command allows one to pipe input from a shell command into + pattern space. If a substitution was made, the command that is + found in pattern space is executed and pattern space is replaced + with its output. A trailing newline is suppressed; results are + undefined if the command to be executed contains a NUL character. + This is a GNU `sed' extension. + +`I' +`i' + The `I' modifier to regular-expression matching is a GNU extension + which makes `sed' match REGEXP in a case-insensitive manner. + +`M' +`m' + The `M' modifier to regular-expression matching is a GNU `sed' + extension which directs GNU `sed' to match the regular expression + in `multi-line' mode. The modifier causes `^' and `$' to match + respectively (in addition to the normal behavior) the empty string + after a newline, and the empty string before a newline. There are + special character sequences (`\`' and `\'') which always match the + beginning or the end of the buffer. In addition, the period + character does not match a new-line character in multi-line mode. + + + ---------- Footnotes ---------- + + (1) This is equivalent to `p' unless the `-i' option is being used. + + +File: sed.info, Node: Other Commands, Next: Programming Commands, Prev: The "s" Command, Up: sed Programs + +3.6 Less Frequently-Used Commands +================================= + +Though perhaps less frequently used than those in the previous section, +some very small yet useful `sed' scripts can be built with these +commands. + +`y/SOURCE-CHARS/DEST-CHARS/' + (The `/' characters may be uniformly replaced by any other single + character within any given `y' command.) + + Transliterate any characters in the pattern space which match any + of the SOURCE-CHARS with the corresponding character in DEST-CHARS. + + Instances of the `/' (or whatever other character is used in its + stead), `\', or newlines can appear in the SOURCE-CHARS or + DEST-CHARS lists, provide that each instance is escaped by a `\'. + The SOURCE-CHARS and DEST-CHARS lists _must_ contain the same + number of characters (after de-escaping). + +`a\' +`TEXT' + As a GNU extension, this command accepts two addresses. + + Queue the lines of text which follow this command (each but the + last ending with a `\', which are removed from the output) to be + output at the end of the current cycle, or when the next input + line is read. + + Escape sequences in TEXT are processed, so you should use `\\' in + TEXT to print a single backslash. + + As a GNU extension, if between the `a' and the newline there is + other than a whitespace-`\' sequence, then the text of this line, + starting at the first non-whitespace character after the `a', is + taken as the first line of the TEXT block. (This enables a + simplification in scripting a one-line add.) This extension also + works with the `i' and `c' commands. + +`i\' +`TEXT' + As a GNU extension, this command accepts two addresses. + + Immediately output the lines of text which follow this command + (each but the last ending with a `\', which are removed from the + output). + +`c\' +`TEXT' + Delete the lines matching the address or address-range, and output + the lines of text which follow this command (each but the last + ending with a `\', which are removed from the output) in place of + the last line (or in place of each line, if no addresses were + specified). A new cycle is started after this command is done, + since the pattern space will have been deleted. + +`=' + As a GNU extension, this command accepts two addresses. + + Print out the current input line number (with a trailing newline). + +`l N' + Print the pattern space in an unambiguous form: non-printable + characters (and the `\' character) are printed in C-style escaped + form; long lines are split, with a trailing `\' character to + indicate the split; the end of each line is marked with a `$'. + + N specifies the desired line-wrap length; a length of 0 (zero) + means to never wrap long lines. If omitted, the default as + specified on the command line is used. The N parameter is a GNU + `sed' extension. + +`r FILENAME' + As a GNU extension, this command accepts two addresses. + + Queue the contents of FILENAME to be read and inserted into the + output stream at the end of the current cycle, or when the next + input line is read. Note that if FILENAME cannot be read, it is + treated as if it were an empty file, without any error indication. + + As a GNU `sed' extension, the special value `/dev/stdin' is + supported for the file name, which reads the contents of the + standard input. + +`w FILENAME' + Write the pattern space to FILENAME. As a GNU `sed' extension, + two special values of FILE-NAME are supported: `/dev/stderr', + which writes the result to the standard error, and `/dev/stdout', + which writes to the standard output.(1) + + The file will be created (or truncated) before the first input + line is read; all `w' commands (including instances of the `w' flag + on successful `s' commands) which refer to the same FILENAME are + output without closing and reopening the file. + +`D' + If pattern space contains no newline, start a normal new cycle as + if the `d' command was issued. Otherwise, delete text in the + pattern space up to the first newline, and restart cycle with the + resultant pattern space, without reading a new line of input. + +`N' + Add a newline to the pattern space, then append the next line of + input to the pattern space. If there is no more input then `sed' + exits without processing any more commands. + +`P' + Print out the portion of the pattern space up to the first newline. + +`h' + Replace the contents of the hold space with the contents of the + pattern space. + +`H' + Append a newline to the contents of the hold space, and then + append the contents of the pattern space to that of the hold space. + +`g' + Replace the contents of the pattern space with the contents of the + hold space. + +`G' + Append a newline to the contents of the pattern space, and then + append the contents of the hold space to that of the pattern space. + +`x' + Exchange the contents of the hold and pattern spaces. + + + ---------- Footnotes ---------- + + (1) This is equivalent to `p' unless the `-i' option is being used. + + +File: sed.info, Node: Programming Commands, Next: Extended Commands, Prev: Other Commands, Up: sed Programs + +3.7 Commands for `sed' gurus +============================ + +In most cases, use of these commands indicates that you are probably +better off programming in something like `awk' or Perl. But +occasionally one is committed to sticking with `sed', and these +commands can enable one to write quite convoluted scripts. + +`: LABEL' + [No addresses allowed.] + + Specify the location of LABEL for branch commands. In all other + respects, a no-op. + +`b LABEL' + Unconditionally branch to LABEL. The LABEL may be omitted, in + which case the next cycle is started. + +`t LABEL' + Branch to LABEL only if there has been a successful `s'ubstitution + since the last input line was read or conditional branch was taken. + The LABEL may be omitted, in which case the next cycle is started. + + + +File: sed.info, Node: Extended Commands, Next: Escapes, Prev: Programming Commands, Up: sed Programs + +3.8 Commands Specific to GNU `sed' +================================== + +These commands are specific to GNU `sed', so you must use them with +care and only when you are sure that hindering portability is not evil. +They allow you to check for GNU `sed' extensions or to do tasks that +are required quite often, yet are unsupported by standard `sed's. + +`e [COMMAND]' + This command allows one to pipe input from a shell command into + pattern space. Without parameters, the `e' command executes the + command that is found in pattern space and replaces the pattern + space with the output; a trailing newline is suppressed. + + If a parameter is specified, instead, the `e' command interprets + it as a command and sends its output to the output stream. The + command can run across multiple lines, all but the last ending with + a back-slash. + + In both cases, the results are undefined if the command to be + executed contains a NUL character. + + Note that, unlike the `r' command, the output of the command will + be printed immediately; the `r' command instead delays the output + to the end of the current cycle. + +`F' + Print out the file name of the current input file (with a trailing + newline). + +`L N' + This GNU `sed' extension fills and joins lines in pattern space to + produce output lines of (at most) N characters, like `fmt' does; + if N is omitted, the default as specified on the command line is + used. This command is considered a failed experiment and unless + there is enough request (which seems unlikely) will be removed in + future versions. + +`Q [EXIT-CODE]' + This command only accepts a single address. + + This command is the same as `q', but will not print the contents + of pattern space. Like `q', it provides the ability to return an + exit code to the caller. + + This command can be useful because the only alternative ways to + accomplish this apparently trivial function are to use the `-n' + option (which can unnecessarily complicate your script) or + resorting to the following snippet, which wastes time by reading + the whole file without any visible effect: + + :eat + $d Quit silently on the last line + N Read another line, silently + g Overwrite pattern space each time to save memory + b eat + +`R FILENAME' + Queue a line of FILENAME to be read and inserted into the output + stream at the end of the current cycle, or when the next input + line is read. Note that if FILENAME cannot be read, or if its end + is reached, no line is appended, without any error indication. + + As with the `r' command, the special value `/dev/stdin' is + supported for the file name, which reads a line from the standard + input. + +`T LABEL' + Branch to LABEL only if there have been no successful + `s'ubstitutions since the last input line was read or conditional + branch was taken. The LABEL may be omitted, in which case the next + cycle is started. + +`v VERSION' + This command does nothing, but makes `sed' fail if GNU `sed' + extensions are not supported, simply because other versions of + `sed' do not implement it. In addition, you can specify the + version of `sed' that your script requires, such as `4.0.5'. The + default is `4.0' because that is the first version that + implemented this command. + + This command enables all GNU extensions even if `POSIXLY_CORRECT' + is set in the environment. + +`W FILENAME' + Write to the given filename the portion of the pattern space up to + the first newline. Everything said under the `w' command about + file handling holds here too. + +`z' + This command empties the content of pattern space. It is usually + the same as `s/.*//', but is more efficient and works in the + presence of invalid multibyte sequences in the input stream. + POSIX mandates that such sequences are _not_ matched by `.', so + that there is no portable way to clear `sed''s buffers in the + middle of the script in most multibyte locales (including UTF-8 + locales). + + +File: sed.info, Node: Escapes, Prev: Extended Commands, Up: sed Programs + +3.9 GNU Extensions for Escapes in Regular Expressions +===================================================== + +Until this chapter, we have only encountered escapes of the form `\^', +which tell `sed' not to interpret the circumflex as a special +character, but rather to take it literally. For example, `\*' matches +a single asterisk rather than zero or more backslashes. + + This chapter introduces another kind of escape(1)--that is, escapes +that are applied to a character or sequence of characters that +ordinarily are taken literally, and that `sed' replaces with a special +character. This provides a way of encoding non-printable characters in +patterns in a visible manner. There is no restriction on the +appearance of non-printing characters in a `sed' script but when a +script is being prepared in the shell or by text editing, it is usually +easier to use one of the following escape sequences than the binary +character it represents: + + The list of these escapes is: + +`\a' + Produces or matches a BEL character, that is an "alert" (ASCII 7). + +`\f' + Produces or matches a form feed (ASCII 12). + +`\n' + Produces or matches a newline (ASCII 10). + +`\r' + Produces or matches a carriage return (ASCII 13). + +`\t' + Produces or matches a horizontal tab (ASCII 9). + +`\v' + Produces or matches a so called "vertical tab" (ASCII 11). + +`\cX' + Produces or matches `CONTROL-X', where X is any character. The + precise effect of `\cX' is as follows: if X is a lower case + letter, it is converted to upper case. Then bit 6 of the + character (hex 40) is inverted. Thus `\cz' becomes hex 1A, but + `\c{' becomes hex 3B, while `\c;' becomes hex 7B. + +`\dXXX' + Produces or matches a character whose decimal ASCII value is XXX. + +`\oXXX' + Produces or matches a character whose octal ASCII value is XXX. + +`\xXX' + Produces or matches a character whose hexadecimal ASCII value is + XX. + + `\b' (backspace) was omitted because of the conflict with the +existing "word boundary" meaning. + + Other escapes match a particular character class and are valid only +in regular expressions: + +`\w' + Matches any "word" character. A "word" character is any letter or + digit or the underscore character. + +`\W' + Matches any "non-word" character. + +`\b' + Matches a word boundary; that is it matches if the character to + the left is a "word" character and the character to the right is a + "non-word" character, or vice-versa. + +`\B' + Matches everywhere but on a word boundary; that is it matches if + the character to the left and the character to the right are + either both "word" characters or both "non-word" characters. + +`\`' + Matches only at the start of pattern space. This is different + from `^' in multi-line mode. + +`\'' + Matches only at the end of pattern space. This is different from + `$' in multi-line mode. + + + ---------- Footnotes ---------- + + (1) All the escapes introduced here are GNU extensions, with the +exception of `\n'. In basic regular expression mode, setting +`POSIXLY_CORRECT' disables them inside bracket expressions. + + +File: sed.info, Node: Examples, Next: Limitations, Prev: sed Programs, Up: Top + +4 Some Sample Scripts +********************* + +Here are some `sed' scripts to guide you in the art of mastering `sed'. + +* Menu: + +Some exotic examples: +* Centering lines:: +* Increment a number:: +* Rename files to lower case:: +* Print bash environment:: +* Reverse chars of lines:: + +Emulating standard utilities: +* tac:: Reverse lines of files +* cat -n:: Numbering lines +* cat -b:: Numbering non-blank lines +* wc -c:: Counting chars +* wc -w:: Counting words +* wc -l:: Counting lines +* head:: Printing the first lines +* tail:: Printing the last lines +* uniq:: Make duplicate lines unique +* uniq -d:: Print duplicated lines of input +* uniq -u:: Remove all duplicated lines +* cat -s:: Squeezing blank lines + + +File: sed.info, Node: Centering lines, Next: Increment a number, Up: Examples + +4.1 Centering Lines +=================== + +This script centers all lines of a file on a 80 columns width. To +change that width, the number in `\{...\}' must be replaced, and the +number of added spaces also must be changed. + + Note how the buffer commands are used to separate parts in the +regular expressions to be matched--this is a common technique. + + #!/usr/bin/sed -f + + # Put 80 spaces in the buffer + 1 { + x + s/^$/ / + s/^.*$/&&&&&&&&/ + x + } + + # del leading and trailing spaces + y/tab/ / + s/^ *// + s/ *$// + + # add a newline and 80 spaces to end of line + G + + # keep first 81 chars (80 + a newline) + s/^\(.\{81\}\).*$/\1/ + + # \2 matches half of the spaces, which are moved to the beginning + s/^\(.*\)\n\(.*\)\2/\2\1/ + + +File: sed.info, Node: Increment a number, Next: Rename files to lower case, Prev: Centering lines, Up: Examples + +4.2 Increment a Number +====================== + +This script is one of a few that demonstrate how to do arithmetic in +`sed'. This is indeed possible,(1) but must be done manually. + + To increment one number you just add 1 to last digit, replacing it +by the following digit. There is one exception: when the digit is a +nine the previous digits must be also incremented until you don't have +a nine. + + This solution by Bruno Haible is very clever and smart because it +uses a single buffer; if you don't have this limitation, the algorithm +used in *note Numbering lines: cat -n, is faster. It works by +replacing trailing nines with an underscore, then using multiple `s' +commands to increment the last digit, and then again substituting +underscores with zeros. + + #!/usr/bin/sed -f + + /[^0-9]/ d + + # replace all trailing 9s by _ (any other character except digits, could + # be used) + :d + s/9\(_*\)$/_\1/ + td + + # incr last digit only. The first line adds a most-significant + # digit of 1 if we have to add a digit. + + s/^\(_*\)$/1\1/; tn + s/8\(_*\)$/9\1/; tn + s/7\(_*\)$/8\1/; tn + s/6\(_*\)$/7\1/; tn + s/5\(_*\)$/6\1/; tn + s/4\(_*\)$/5\1/; tn + s/3\(_*\)$/4\1/; tn + s/2\(_*\)$/3\1/; tn + s/1\(_*\)$/2\1/; tn + s/0\(_*\)$/1\1/; tn + + :n + y/_/0/ + + ---------- Footnotes ---------- + + (1) `sed' guru Greg Ubben wrote an implementation of the `dc' RPN +calculator! It is distributed together with sed. + + +File: sed.info, Node: Rename files to lower case, Next: Print bash environment, Prev: Increment a number, Up: Examples + +4.3 Rename Files to Lower Case +============================== + +This is a pretty strange use of `sed'. We transform text, and +transform it to be shell commands, then just feed them to shell. Don't +worry, even worse hacks are done when using `sed'; I have seen a script +converting the output of `date' into a `bc' program! + + The main body of this is the `sed' script, which remaps the name +from lower to upper (or vice-versa) and even checks out if the remapped +name is the same as the original name. Note how the script is +parameterized using shell variables and proper quoting. + + #! /bin/sh + # rename files to lower/upper case... + # + # usage: + # move-to-lower * + # move-to-upper * + # or + # move-to-lower -R . + # move-to-upper -R . + # + + help() + { + cat << eof + Usage: $0 [-n] [-r] [-h] files... + + -n do nothing, only see what would be done + -R recursive (use find) + -h this message + files files to remap to lower case + + Examples: + $0 -n * (see if everything is ok, then...) + $0 * + + $0 -R . + + eof + } + + apply_cmd='sh' + finder='echo "$@" | tr " " "\n"' + files_only= + + while : + do + case "$1" in + -n) apply_cmd='cat' ;; + -R) finder='find "$@" -type f';; + -h) help ; exit 1 ;; + *) break ;; + esac + shift + done + + if [ -z "$1" ]; then + echo Usage: $0 [-h] [-n] [-r] files... + exit 1 + fi + + LOWER='abcdefghijklmnopqrstuvwxyz' + UPPER='ABCDEFGHIJKLMNOPQRSTUVWXYZ' + + case `basename $0` in + *upper*) TO=$UPPER; FROM=$LOWER ;; + *) FROM=$UPPER; TO=$LOWER ;; + esac + + eval $finder | sed -n ' + + # remove all trailing slashes + s/\/*$// + + # add ./ if there is no path, only a filename + /\//! s/^/.\// + + # save path+filename + h + + # remove path + s/.*\/// + + # do conversion only on filename + y/'$FROM'/'$TO'/ + + # now line contains original path+file, while + # hold space contains the new filename + x + + # add converted file name to line, which now contains + # path/file-name\nconverted-file-name + G + + # check if converted file name is equal to original file name, + # if it is, do not print anything + /^.*\/\(.*\)\n\1/b + + # escape special characters for the shell + s/["$`\\]/\\&/g + + # now, transform path/fromfile\n, into + # mv path/fromfile path/tofile and print it + s/^\(.*\/\)\(.*\)\n\(.*\)$/mv "\1\2" "\1\3"/p + + ' | $apply_cmd + + +File: sed.info, Node: Print bash environment, Next: Reverse chars of lines, Prev: Rename files to lower case, Up: Examples + +4.4 Print `bash' Environment +============================ + +This script strips the definition of the shell functions from the +output of the `set' Bourne-shell command. + + #!/bin/sh + + set | sed -n ' + :x + + # if no occurrence of "=()" print and load next line + /=()/! { p; b; } + / () $/! { p; b; } + + # possible start of functions section + # save the line in case this is a var like FOO="() " + h + + # if the next line has a brace, we quit because + # nothing comes after functions + n + /^{/ q + + # print the old line + x; p + + # work on the new line now + x; bx + ' + + +File: sed.info, Node: Reverse chars of lines, Next: tac, Prev: Print bash environment, Up: Examples + +4.5 Reverse Characters of Lines +=============================== + +This script can be used to reverse the position of characters in lines. +The technique moves two characters at a time, hence it is faster than +more intuitive implementations. + + Note the `tx' command before the definition of the label. This is +often needed to reset the flag that is tested by the `t' command. + + Imaginative readers will find uses for this script. An example is +reversing the output of `banner'.(1) + + #!/usr/bin/sed -f + + /../! b + + # Reverse a line. Begin embedding the line between two newlines + s/^.*$/\ + &\ + / + + # Move first character at the end. The regexp matches until + # there are zero or one characters between the markers + tx + :x + s/\(\n.\)\(.*\)\(.\n\)/\3\2\1/ + tx + + # Remove the newline markers + s/\n//g + + ---------- Footnotes ---------- + + (1) This requires another script to pad the output of banner; for +example + + #! /bin/sh + + banner -w $1 $2 $3 $4 | + sed -e :a -e '/^.\{0,'$1'\}$/ { s/$/ /; ba; }' | + ~/sedscripts/reverseline.sed + + +File: sed.info, Node: tac, Next: cat -n, Prev: Reverse chars of lines, Up: Examples + +4.6 Reverse Lines of Files +========================== + +This one begins a series of totally useless (yet interesting) scripts +emulating various Unix commands. This, in particular, is a `tac' +workalike. + + Note that on implementations other than GNU `sed' this script might +easily overflow internal buffers. + + #!/usr/bin/sed -nf + + # reverse all lines of input, i.e. first line became last, ... + + # from the second line, the buffer (which contains all previous lines) + # is *appended* to current line, so, the order will be reversed + 1! G + + # on the last line we're done -- print everything + $ p + + # store everything on the buffer again + h + + +File: sed.info, Node: cat -n, Next: cat -b, Prev: tac, Up: Examples + +4.7 Numbering Lines +=================== + +This script replaces `cat -n'; in fact it formats its output exactly +like GNU `cat' does. + + Of course this is completely useless and for two reasons: first, +because somebody else did it in C, second, because the following +Bourne-shell script could be used for the same purpose and would be +much faster: + + #! /bin/sh + sed -e "=" $@ | sed -e ' + s/^/ / + N + s/^ *\(......\)\n/\1 / + ' + + It uses `sed' to print the line number, then groups lines two by two +using `N'. Of course, this script does not teach as much as the one +presented below. + + The algorithm used for incrementing uses both buffers, so the line +is printed as soon as possible and then discarded. The number is split +so that changing digits go in a buffer and unchanged ones go in the +other; the changed digits are modified in a single step (using a `y' +command). The line number for the next line is then composed and +stored in the hold space, to be used in the next iteration. + + #!/usr/bin/sed -nf + + # Prime the pump on the first line + x + /^$/ s/^.*$/1/ + + # Add the correct line number before the pattern + G + h + + # Format it and print it + s/^/ / + s/^ *\(......\)\n/\1 /p + + # Get the line number from hold space; add a zero + # if we're going to add a digit on the next line + g + s/\n.*$// + /^9*$/ s/^/0/ + + # separate changing/unchanged digits with an x + s/.9*$/x&/ + + # keep changing digits in hold space + h + s/^.*x// + y/0123456789/1234567890/ + x + + # keep unchanged digits in pattern space + s/x.*$// + + # compose the new number, remove the newline implicitly added by G + G + s/\n// + h + + +File: sed.info, Node: cat -b, Next: wc -c, Prev: cat -n, Up: Examples + +4.8 Numbering Non-blank Lines +============================= + +Emulating `cat -b' is almost the same as `cat -n'--we only have to +select which lines are to be numbered and which are not. + + The part that is common to this script and the previous one is not +commented to show how important it is to comment `sed' scripts +properly... + + #!/usr/bin/sed -nf + + /^$/ { + p + b + } + + # Same as cat -n from now + x + /^$/ s/^.*$/1/ + G + h + s/^/ / + s/^ *\(......\)\n/\1 /p + x + s/\n.*$// + /^9*$/ s/^/0/ + s/.9*$/x&/ + h + s/^.*x// + y/0123456789/1234567890/ + x + s/x.*$// + G + s/\n// + h + + +File: sed.info, Node: wc -c, Next: wc -w, Prev: cat -b, Up: Examples + +4.9 Counting Characters +======================= + +This script shows another way to do arithmetic with `sed'. In this +case we have to add possibly large numbers, so implementing this by +successive increments would not be feasible (and possibly even more +complicated to contrive than this script). + + The approach is to map numbers to letters, kind of an abacus +implemented with `sed'. `a's are units, `b's are tens and so on: we +simply add the number of characters on the current line as units, and +then propagate the carry to tens, hundreds, and so on. + + As usual, running totals are kept in hold space. + + On the last line, we convert the abacus form back to decimal. For +the sake of variety, this is done with a loop rather than with some 80 +`s' commands(1): first we convert units, removing `a's from the number; +then we rotate letters so that tens become `a's, and so on until no +more letters remain. + + #!/usr/bin/sed -nf + + # Add n+1 a's to hold space (+1 is for the newline) + s/./a/g + H + x + s/\n/a/ + + # Do the carry. The t's and b's are not necessary, + # but they do speed up the thing + t a + : a; s/aaaaaaaaaa/b/g; t b; b done + : b; s/bbbbbbbbbb/c/g; t c; b done + : c; s/cccccccccc/d/g; t d; b done + : d; s/dddddddddd/e/g; t e; b done + : e; s/eeeeeeeeee/f/g; t f; b done + : f; s/ffffffffff/g/g; t g; b done + : g; s/gggggggggg/h/g; t h; b done + : h; s/hhhhhhhhhh//g + + : done + $! { + h + b + } + + # On the last line, convert back to decimal + + : loop + /a/! s/[b-h]*/&0/ + s/aaaaaaaaa/9/ + s/aaaaaaaa/8/ + s/aaaaaaa/7/ + s/aaaaaa/6/ + s/aaaaa/5/ + s/aaaa/4/ + s/aaa/3/ + s/aa/2/ + s/a/1/ + + : next + y/bcdefgh/abcdefg/ + /[a-h]/ b loop + p + + ---------- Footnotes ---------- + + (1) Some implementations have a limit of 199 commands per script + + +File: sed.info, Node: wc -w, Next: wc -l, Prev: wc -c, Up: Examples + +4.10 Counting Words +=================== + +This script is almost the same as the previous one, once each of the +words on the line is converted to a single `a' (in the previous script +each letter was changed to an `a'). + + It is interesting that real `wc' programs have optimized loops for +`wc -c', so they are much slower at counting words rather than +characters. This script's bottleneck, instead, is arithmetic, and +hence the word-counting one is faster (it has to manage smaller +numbers). + + Again, the common parts are not commented to show the importance of +commenting `sed' scripts. + + #!/usr/bin/sed -nf + + # Convert words to a's + s/[ tab][ tab]*/ /g + s/^/ / + s/ [^ ][^ ]*/a /g + s/ //g + + # Append them to hold space + H + x + s/\n// + + # From here on it is the same as in wc -c. + /aaaaaaaaaa/! bx; s/aaaaaaaaaa/b/g + /bbbbbbbbbb/! bx; s/bbbbbbbbbb/c/g + /cccccccccc/! bx; s/cccccccccc/d/g + /dddddddddd/! bx; s/dddddddddd/e/g + /eeeeeeeeee/! bx; s/eeeeeeeeee/f/g + /ffffffffff/! bx; s/ffffffffff/g/g + /gggggggggg/! bx; s/gggggggggg/h/g + s/hhhhhhhhhh//g + :x + $! { h; b; } + :y + /a/! s/[b-h]*/&0/ + s/aaaaaaaaa/9/ + s/aaaaaaaa/8/ + s/aaaaaaa/7/ + s/aaaaaa/6/ + s/aaaaa/5/ + s/aaaa/4/ + s/aaa/3/ + s/aa/2/ + s/a/1/ + y/bcdefgh/abcdefg/ + /[a-h]/ by + p + + +File: sed.info, Node: wc -l, Next: head, Prev: wc -w, Up: Examples + +4.11 Counting Lines +=================== + +No strange things are done now, because `sed' gives us `wc -l' +functionality for free!!! Look: + + #!/usr/bin/sed -nf + $= + + +File: sed.info, Node: head, Next: tail, Prev: wc -l, Up: Examples + +4.12 Printing the First Lines +============================= + +This script is probably the simplest useful `sed' script. It displays +the first 10 lines of input; the number of displayed lines is right +before the `q' command. + + #!/usr/bin/sed -f + 10q + + +File: sed.info, Node: tail, Next: uniq, Prev: head, Up: Examples + +4.13 Printing the Last Lines +============================ + +Printing the last N lines rather than the first is more complex but +indeed possible. N is encoded in the second line, before the bang +character. + + This script is similar to the `tac' script in that it keeps the +final output in the hold space and prints it at the end: + + #!/usr/bin/sed -nf + + 1! {; H; g; } + 1,10 !s/[^\n]*\n// + $p + h + + Mainly, the scripts keeps a window of 10 lines and slides it by +adding a line and deleting the oldest (the substitution command on the +second line works like a `D' command but does not restart the loop). + + The "sliding window" technique is a very powerful way to write +efficient and complex `sed' scripts, because commands like `P' would +require a lot of work if implemented manually. + + To introduce the technique, which is fully demonstrated in the rest +of this chapter and is based on the `N', `P' and `D' commands, here is +an implementation of `tail' using a simple "sliding window." + + This looks complicated but in fact the working is the same as the +last script: after we have kicked in the appropriate number of lines, +however, we stop using the hold space to keep inter-line state, and +instead use `N' and `D' to slide pattern space by one line: + + #!/usr/bin/sed -f + + 1h + 2,10 {; H; g; } + $q + 1,9d + N + D + + Note how the first, second and fourth line are inactive after the +first ten lines of input. After that, all the script does is: exiting +on the last line of input, appending the next input line to pattern +space, and removing the first line. + + +File: sed.info, Node: uniq, Next: uniq -d, Prev: tail, Up: Examples + +4.14 Make Duplicate Lines Unique +================================ + +This is an example of the art of using the `N', `P' and `D' commands, +probably the most difficult to master. + + #!/usr/bin/sed -f + h + + :b + # On the last line, print and exit + $b + N + /^\(.*\)\n\1$/ { + # The two lines are identical. Undo the effect of + # the n command. + g + bb + } + + # If the `N' command had added the last line, print and exit + $b + + # The lines are different; print the first and go + # back working on the second. + P + D + + As you can see, we mantain a 2-line window using `P' and `D'. This +technique is often used in advanced `sed' scripts. + + +File: sed.info, Node: uniq -d, Next: uniq -u, Prev: uniq, Up: Examples + +4.15 Print Duplicated Lines of Input +==================================== + +This script prints only duplicated lines, like `uniq -d'. + + #!/usr/bin/sed -nf + + $b + N + /^\(.*\)\n\1$/ { + # Print the first of the duplicated lines + s/.*\n// + p + + # Loop until we get a different line + :b + $b + N + /^\(.*\)\n\1$/ { + s/.*\n// + bb + } + } + + # The last line cannot be followed by duplicates + $b + + # Found a different one. Leave it alone in the pattern space + # and go back to the top, hunting its duplicates + D + + +File: sed.info, Node: uniq -u, Next: cat -s, Prev: uniq -d, Up: Examples + +4.16 Remove All Duplicated Lines +================================ + +This script prints only unique lines, like `uniq -u'. + + #!/usr/bin/sed -f + + # Search for a duplicate line --- until that, print what you find. + $b + N + /^\(.*\)\n\1$/ ! { + P + D + } + + :c + # Got two equal lines in pattern space. At the + # end of the file we simply exit + $d + + # Else, we keep reading lines with `N' until we + # find a different one + s/.*\n// + N + /^\(.*\)\n\1$/ { + bc + } + + # Remove the last instance of the duplicate line + # and go back to the top + D + + +File: sed.info, Node: cat -s, Prev: uniq -u, Up: Examples + +4.17 Squeezing Blank Lines +========================== + +As a final example, here are three scripts, of increasing complexity +and speed, that implement the same function as `cat -s', that is +squeezing blank lines. + + The first leaves a blank line at the beginning and end if there are +some already. + + #!/usr/bin/sed -f + + # on empty lines, join with next + # Note there is a star in the regexp + :x + /^\n*$/ { + N + bx + } + + # now, squeeze all '\n', this can be also done by: + # s/^\(\n\)*/\1/ + s/\n*/\ + / + + This one is a bit more complex and removes all empty lines at the +beginning. It does leave a single blank line at end if one was there. + + #!/usr/bin/sed -f + + # delete all leading empty lines + 1,/^./{ + /./!d + } + + # on an empty line we remove it and all the following + # empty lines, but one + :x + /./!{ + N + s/^\n$// + tx + } + + This removes leading and trailing blank lines. It is also the +fastest. Note that loops are completely done with `n' and `b', without +relying on `sed' to restart the the script automatically at the end of +a line. + + #!/usr/bin/sed -nf + + # delete all (leading) blanks + /./!d + + # get here: so there is a non empty + :x + # print it + p + # get next + n + # got chars? print it again, etc... + /./bx + + # no, don't have chars: got an empty line + :z + # get next, if last line we finish here so no trailing + # empty lines are written + n + # also empty? then ignore it, and get next... this will + # remove ALL empty lines + /./!bz + + # all empty lines were deleted/ignored, but we have a non empty. As + # what we want to do is to squeeze, insert a blank line artificially + i\ + + bx + + +File: sed.info, Node: Limitations, Next: Other Resources, Prev: Examples, Up: Top + +5 GNU `sed''s Limitations and Non-limitations +********************************************* + +For those who want to write portable `sed' scripts, be aware that some +implementations have been known to limit line lengths (for the pattern +and hold spaces) to be no more than 4000 bytes. The POSIX standard +specifies that conforming `sed' implementations shall support at least +8192 byte line lengths. GNU `sed' has no built-in limit on line length; +as long as it can `malloc()' more (virtual) memory, you can feed or +construct lines as long as you like. + + However, recursion is used to handle subpatterns and indefinite +repetition. This means that the available stack space may limit the +size of the buffer that can be processed by certain patterns. + + +File: sed.info, Node: Other Resources, Next: Reporting Bugs, Prev: Limitations, Up: Top + +6 Other Resources for Learning About `sed' +****************************************** + +In addition to several books that have been written about `sed' (either +specifically or as chapters in books which discuss shell programming), +one can find out more about `sed' (including suggestions of a few +books) from the FAQ for the `sed-users' mailing list, available from: + `http://sed.sourceforge.net/sedfaq.html' + + Also of interest are +`http://www.student.northpark.edu/pemente/sed/index.htm' and +`http://sed.sf.net/grabbag', which include `sed' tutorials and other +`sed'-related goodies. + + The `sed-users' mailing list itself maintained by Sven Guckes. To +subscribe, visit `http://groups.yahoo.com' and search for the +`sed-users' mailing list. + + +File: sed.info, Node: Reporting Bugs, Next: Extended regexps, Prev: Other Resources, Up: Top + +7 Reporting Bugs +**************** + +Email bug reports to <bug-sed@gnu.org>. Also, please include the +output of `sed --version' in the body of your report if at all possible. + + Please do not send a bug report like this: + + while building frobme-1.3.4 + $ configure + error--> sed: file sedscr line 1: Unknown option to 's' + + If GNU `sed' doesn't configure your favorite package, take a few +extra minutes to identify the specific problem and make a stand-alone +test case. Unlike other programs such as C compilers, making such test +cases for `sed' is quite simple. + + A stand-alone test case includes all the data necessary to perform +the test, and the specific invocation of `sed' that causes the problem. +The smaller a stand-alone test case is, the better. A test case should +not involve something as far removed from `sed' as "try to configure +frobme-1.3.4". Yes, that is in principle enough information to look +for the bug, but that is not a very practical prospect. + + Here are a few commonly reported bugs that are not bugs. + +`N' command on the last line + Most versions of `sed' exit without printing anything when the `N' + command is issued on the last line of a file. GNU `sed' prints + pattern space before exiting unless of course the `-n' command + switch has been specified. This choice is by design. + + For example, the behavior of + sed N foo bar + would depend on whether foo has an even or an odd number of + lines(1). Or, when writing a script to read the next few lines + following a pattern match, traditional implementations of `sed' + would force you to write something like + /foo/{ $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N } + instead of just + /foo/{ N;N;N;N;N;N;N;N;N; } + + In any case, the simplest workaround is to use `$d;N' in scripts + that rely on the traditional behavior, or to set the + `POSIXLY_CORRECT' variable to a non-empty value. + +Regex syntax clashes (problems with backslashes) + `sed' uses the POSIX basic regular expression syntax. According to + the standard, the meaning of some escape sequences is undefined in + this syntax; notable in the case of `sed' are `\|', `\+', `\?', + `\`', `\'', `\<', `\>', `\b', `\B', `\w', and `\W'. + + As in all GNU programs that use POSIX basic regular expressions, + `sed' interprets these escape sequences as special characters. + So, `x\+' matches one or more occurrences of `x'. `abc\|def' + matches either `abc' or `def'. + + This syntax may cause problems when running scripts written for + other `sed's. Some `sed' programs have been written with the + assumption that `\|' and `\+' match the literal characters `|' and + `+'. Such scripts must be modified by removing the spurious + backslashes if they are to be used with modern implementations of + `sed', like GNU `sed'. + + On the other hand, some scripts use s|abc\|def||g to remove + occurrences of _either_ `abc' or `def'. While this worked until + `sed' 4.0.x, newer versions interpret this as removing the string + `abc|def'. This is again undefined behavior according to POSIX, + and this interpretation is arguably more robust: older `sed's, for + example, required that the regex matcher parsed `\/' as `/' in the + common case of escaping a slash, which is again undefined + behavior; the new behavior avoids this, and this is good because + the regex matcher is only partially under our control. + + In addition, this version of `sed' supports several escape + characters (some of which are multi-character) to insert + non-printable characters in scripts (`\a', `\c', `\d', `\o', `\r', + `\t', `\v', `\x'). These can cause similar problems with scripts + written for other `sed's. + +`-i' clobbers read-only files + In short, `sed -i' will let you delete the contents of a read-only + file, and in general the `-i' option (*note Invocation: Invoking + sed.) lets you clobber protected files. This is not a bug, but + rather a consequence of how the Unix filesystem works. + + The permissions on a file say what can happen to the data in that + file, while the permissions on a directory say what can happen to + the list of files in that directory. `sed -i' will not ever open + for writing a file that is already on disk. Rather, it will work + on a temporary file that is finally renamed to the original name: + if you rename or delete files, you're actually modifying the + contents of the directory, so the operation depends on the + permissions of the directory, not of the file. For this same + reason, `sed' does not let you use `-i' on a writeable file in a + read-only directory, and will break hard or symbolic links when + `-i' is used on such a file. + +`0a' does not work (gives an error) + There is no line 0. 0 is a special address that is only used to + treat addresses like `0,/RE/' as active when the script starts: if + you write `1,/abc/d' and the first line includes the word `abc', + then that match would be ignored because address ranges must span + at least two lines (barring the end of the file); but what you + probably wanted is to delete every line up to the first one + including `abc', and this is obtained with `0,/abc/d'. + +`[a-z]' is case insensitive + You are encountering problems with locales. POSIX mandates that + `[a-z]' uses the current locale's collation order - in C parlance, + that means using `strcoll(3)' instead of `strcmp(3)'. Some + locales have a case-insensitive collation order, others don't. + + Another problem is that `[a-z]' tries to use collation symbols. + This only happens if you are on the GNU system, using GNU libc's + regular expression matcher instead of compiling the one supplied + with GNU sed. In a Danish locale, for example, the regular + expression `^[a-z]$' matches the string `aa', because this is a + single collating symbol that comes after `a' and before `b'; `ll' + behaves similarly in Spanish locales, or `ij' in Dutch locales. + + To work around these problems, which may cause bugs in shell + scripts, set the `LC_COLLATE' and `LC_CTYPE' environment variables + to `C'. + +`s/.*//' does not clear pattern space + This happens if your input stream includes invalid multibyte + sequences. POSIX mandates that such sequences are _not_ matched + by `.', so that `s/.*//' will not clear pattern space as you would + expect. In fact, there is no way to clear sed's buffers in the + middle of the script in most multibyte locales (including UTF-8 + locales). For this reason, GNU `sed' provides a `z' command (for + `zap') as an extension. + + To work around these problems, which may cause bugs in shell + scripts, set the `LC_COLLATE' and `LC_CTYPE' environment variables + to `C'. + + ---------- Footnotes ---------- + + (1) which is the actual "bug" that prompted the change in behavior + + +File: sed.info, Node: Extended regexps, Next: Concept Index, Prev: Reporting Bugs, Up: Top + +Appendix A Extended regular expressions +*************************************** + +The only difference between basic and extended regular expressions is in +the behavior of a few characters: `?', `+', parentheses, braces (`{}'), +and `|'. While basic regular expressions require these to be escaped +if you want them to behave as special characters, when using extended +regular expressions you must escape them if you want them _to match a +literal character_. `|' is special here because `\|' is a GNU +extension - standard basic regular expressions do not provide its +functionality. + +Examples: +`abc?' + becomes `abc\?' when using extended regular expressions. It + matches the literal string `abc?'. + +`c\+' + becomes `c+' when using extended regular expressions. It matches + one or more `c's. + +`a\{3,\}' + becomes `a{3,}' when using extended regular expressions. It + matches three or more `a's. + +`\(abc\)\{2,3\}' + becomes `(abc){2,3}' when using extended regular expressions. It + matches either `abcabc' or `abcabcabc'. + +`\(abc*\)\1' + becomes `(abc*)\1' when using extended regular expressions. + Backreferences must still be escaped when using extended regular + expressions. + + +File: sed.info, Node: Concept Index, Next: Command and Option Index, Prev: Extended regexps, Up: Top + +Concept Index +************* + +This is a general index of all issues discussed in this manual, with the +exception of the `sed' commands and command-line options. + + +* Menu: + +* 0 address: Reporting Bugs. (line 102) +* Additional reading about sed: Other Resources. (line 6) +* ADDR1,+N: Addresses. (line 80) +* ADDR1,~N: Addresses. (line 80) +* Address, as a regular expression: Addresses. (line 27) +* Address, last line: Addresses. (line 22) +* Address, numeric: Addresses. (line 8) +* Addresses, in sed scripts: Addresses. (line 6) +* Append hold space to pattern space: Other Commands. (line 125) +* Append next input line to pattern space: Other Commands. (line 105) +* Append pattern space to hold space: Other Commands. (line 117) +* Appending text after a line: Other Commands. (line 27) +* Backreferences, in regular expressions: The "s" Command. (line 19) +* Branch to a label, if s/// failed: Extended Commands. (line 71) +* Branch to a label, if s/// succeeded: Programming Commands. + (line 22) +* Branch to a label, unconditionally: Programming Commands. + (line 18) +* Buffer spaces, pattern and hold: Execution Cycle. (line 6) +* Bugs, reporting: Reporting Bugs. (line 6) +* Case-insensitive matching: The "s" Command. (line 112) +* Caveat -- #n on first line: Common Commands. (line 20) +* Command groups: Common Commands. (line 50) +* Comments, in scripts: Common Commands. (line 12) +* Conditional branch <1>: Extended Commands. (line 71) +* Conditional branch: Programming Commands. + (line 22) +* Copy hold space into pattern space: Other Commands. (line 121) +* Copy pattern space into hold space: Other Commands. (line 113) +* Delete first line from pattern space: Other Commands. (line 99) +* Disabling autoprint, from command line: Invoking sed. (line 34) +* empty regular expression: Addresses. (line 31) +* Emptying pattern space <1>: Reporting Bugs. (line 129) +* Emptying pattern space: Extended Commands. (line 93) +* Evaluate Bourne-shell commands: Extended Commands. (line 12) +* Evaluate Bourne-shell commands, after substitution: The "s" Command. + (line 103) +* Exchange hold space with pattern space: Other Commands. (line 129) +* Excluding lines: Addresses. (line 103) +* Extended regular expressions, choosing: Invoking sed. (line 113) +* Extended regular expressions, syntax: Extended regexps. (line 6) +* File name, printing: Extended Commands. (line 30) +* Files to be processed as input: Invoking sed. (line 148) +* Flow of control in scripts: Programming Commands. + (line 11) +* Global substitution: The "s" Command. (line 69) +* GNU extensions, /dev/stderr file <1>: Other Commands. (line 88) +* GNU extensions, /dev/stderr file: The "s" Command. (line 96) +* GNU extensions, /dev/stdin file <1>: Extended Commands. (line 61) +* GNU extensions, /dev/stdin file: Other Commands. (line 78) +* GNU extensions, /dev/stdout file <1>: Other Commands. (line 88) +* GNU extensions, /dev/stdout file <2>: The "s" Command. (line 96) +* GNU extensions, /dev/stdout file: Invoking sed. (line 156) +* GNU extensions, 0 address <1>: Reporting Bugs. (line 102) +* GNU extensions, 0 address: Addresses. (line 80) +* GNU extensions, 0,ADDR2 addressing: Addresses. (line 80) +* GNU extensions, ADDR1,+N addressing: Addresses. (line 80) +* GNU extensions, ADDR1,~N addressing: Addresses. (line 80) +* GNU extensions, branch if s/// failed: Extended Commands. (line 71) +* GNU extensions, case modifiers in s commands: The "s" Command. + (line 23) +* GNU extensions, checking for their presence: Extended Commands. + (line 77) +* GNU extensions, disabling: Invoking sed. (line 81) +* GNU extensions, emptying pattern space <1>: Reporting Bugs. (line 129) +* GNU extensions, emptying pattern space: Extended Commands. (line 93) +* GNU extensions, evaluating Bourne-shell commands <1>: Extended Commands. + (line 12) +* GNU extensions, evaluating Bourne-shell commands: The "s" Command. + (line 103) +* GNU extensions, extended regular expressions: Invoking sed. (line 113) +* GNU extensions, g and NUMBER modifier interaction in s command: The "s" Command. + (line 75) +* GNU extensions, I modifier <1>: The "s" Command. (line 112) +* GNU extensions, I modifier: Addresses. (line 49) +* GNU extensions, in-place editing <1>: Reporting Bugs. (line 84) +* GNU extensions, in-place editing: Invoking sed. (line 51) +* GNU extensions, L command: Extended Commands. (line 34) +* GNU extensions, M modifier <1>: The "s" Command. (line 117) +* GNU extensions, M modifier: Addresses. (line 54) +* GNU extensions, modifiers and the empty regular expression: Addresses. + (line 31) +* GNU extensions, N~M addresses: Addresses. (line 13) +* GNU extensions, quitting silently: Extended Commands. (line 44) +* GNU extensions, R command: Extended Commands. (line 61) +* GNU extensions, reading a file a line at a time: Extended Commands. + (line 61) +* GNU extensions, reformatting paragraphs: Extended Commands. (line 34) +* GNU extensions, returning an exit code <1>: Extended Commands. + (line 44) +* GNU extensions, returning an exit code: Common Commands. (line 30) +* GNU extensions, setting line length: Other Commands. (line 65) +* GNU extensions, special escapes <1>: Reporting Bugs. (line 77) +* GNU extensions, special escapes: Escapes. (line 6) +* GNU extensions, special two-address forms: Addresses. (line 80) +* GNU extensions, subprocesses <1>: Extended Commands. (line 12) +* GNU extensions, subprocesses: The "s" Command. (line 103) +* GNU extensions, to basic regular expressions <1>: Reporting Bugs. + (line 50) +* GNU extensions, to basic regular expressions: Regular Expressions. + (line 26) +* GNU extensions, two addresses supported by most commands: Other Commands. + (line 25) +* GNU extensions, unlimited line length: Limitations. (line 6) +* GNU extensions, writing first line to a file: Extended Commands. + (line 88) +* Goto, in scripts: Programming Commands. + (line 18) +* Greedy regular expression matching: Regular Expressions. (line 143) +* Grouping commands: Common Commands. (line 50) +* Hold space, appending from pattern space: Other Commands. (line 117) +* Hold space, appending to pattern space: Other Commands. (line 125) +* Hold space, copy into pattern space: Other Commands. (line 121) +* Hold space, copying pattern space into: Other Commands. (line 113) +* Hold space, definition: Execution Cycle. (line 6) +* Hold space, exchange with pattern space: Other Commands. (line 129) +* In-place editing: Reporting Bugs. (line 84) +* In-place editing, activating: Invoking sed. (line 51) +* In-place editing, Perl-style backup file names: Invoking sed. + (line 62) +* Inserting text before a line: Other Commands. (line 46) +* Labels, in scripts: Programming Commands. + (line 14) +* Last line, selecting: Addresses. (line 22) +* Line length, setting <1>: Other Commands. (line 65) +* Line length, setting: Invoking sed. (line 76) +* Line number, printing: Other Commands. (line 62) +* Line selection: Addresses. (line 6) +* Line, selecting by number: Addresses. (line 8) +* Line, selecting by regular expression match: Addresses. (line 27) +* Line, selecting last: Addresses. (line 22) +* List pattern space: Other Commands. (line 65) +* Mixing g and NUMBER modifiers in the s command: The "s" Command. + (line 75) +* Next input line, append to pattern space: Other Commands. (line 105) +* Next input line, replace pattern space with: Common Commands. + (line 44) +* Non-bugs, 0 address: Reporting Bugs. (line 102) +* Non-bugs, in-place editing: Reporting Bugs. (line 84) +* Non-bugs, localization-related: Reporting Bugs. (line 111) +* Non-bugs, N command on the last line: Reporting Bugs. (line 30) +* Non-bugs, regex syntax clashes: Reporting Bugs. (line 50) +* Parenthesized substrings: The "s" Command. (line 19) +* Pattern space, definition: Execution Cycle. (line 6) +* Portability, comments: Common Commands. (line 15) +* Portability, line length limitations: Limitations. (line 6) +* Portability, N command on the last line: Reporting Bugs. (line 30) +* POSIXLY_CORRECT behavior, bracket expressions: Regular Expressions. + (line 105) +* POSIXLY_CORRECT behavior, enabling: Invoking sed. (line 84) +* POSIXLY_CORRECT behavior, escapes: Escapes. (line 11) +* POSIXLY_CORRECT behavior, N command: Reporting Bugs. (line 45) +* Print first line from pattern space: Other Commands. (line 110) +* Printing file name: Extended Commands. (line 30) +* Printing line number: Other Commands. (line 62) +* Printing text unambiguously: Other Commands. (line 65) +* Quitting <1>: Extended Commands. (line 44) +* Quitting: Common Commands. (line 30) +* Range of lines: Addresses. (line 67) +* Range with start address of zero: Addresses. (line 80) +* Read next input line: Common Commands. (line 44) +* Read text from a file <1>: Extended Commands. (line 61) +* Read text from a file: Other Commands. (line 78) +* Reformat pattern space: Extended Commands. (line 34) +* Reformatting paragraphs: Extended Commands. (line 34) +* Replace hold space with copy of pattern space: Other Commands. + (line 113) +* Replace pattern space with copy of hold space: Other Commands. + (line 121) +* Replacing all text matching regexp in a line: The "s" Command. + (line 69) +* Replacing only Nth match of regexp in a line: The "s" Command. + (line 73) +* Replacing selected lines with other text: Other Commands. (line 52) +* Requiring GNU sed: Extended Commands. (line 77) +* Script structure: sed Programs. (line 6) +* Script, from a file: Invoking sed. (line 46) +* Script, from command line: Invoking sed. (line 41) +* sed program structure: sed Programs. (line 6) +* Selecting lines to process: Addresses. (line 6) +* Selecting non-matching lines: Addresses. (line 103) +* Several lines, selecting: Addresses. (line 67) +* Slash character, in regular expressions: Addresses. (line 41) +* Spaces, pattern and hold: Execution Cycle. (line 6) +* Special addressing forms: Addresses. (line 80) +* Standard input, processing as input: Invoking sed. (line 150) +* Stream editor: Introduction. (line 6) +* Subprocesses <1>: Extended Commands. (line 12) +* Subprocesses: The "s" Command. (line 103) +* Substitution of text, options: The "s" Command. (line 65) +* Text, appending: Other Commands. (line 27) +* Text, deleting: Common Commands. (line 36) +* Text, insertion: Other Commands. (line 46) +* Text, printing: Common Commands. (line 39) +* Text, printing after substitution: The "s" Command. (line 83) +* Text, writing to a file after substitution: The "s" Command. + (line 96) +* Transliteration: Other Commands. (line 14) +* Unbuffered I/O, choosing: Invoking sed. (line 131) +* Usage summary, printing: Invoking sed. (line 28) +* Version, printing: Invoking sed. (line 24) +* Working on separate files: Invoking sed. (line 121) +* Write first line to a file: Extended Commands. (line 88) +* Write to a file: Other Commands. (line 88) +* Zero, as range start address: Addresses. (line 80) + + +File: sed.info, Node: Command and Option Index, Prev: Concept Index, Up: Top + +Command and Option Index +************************ + +This is an alphabetical list of all `sed' commands and command-line +options. + + +* Menu: + +* # (comments): Common Commands. (line 12) +* --binary: Invoking sed. (line 93) +* --expression: Invoking sed. (line 41) +* --file: Invoking sed. (line 46) +* --follow-symlinks: Invoking sed. (line 104) +* --help: Invoking sed. (line 28) +* --in-place: Invoking sed. (line 51) +* --line-length: Invoking sed. (line 76) +* --null-data: Invoking sed. (line 139) +* --posix: Invoking sed. (line 81) +* --quiet: Invoking sed. (line 34) +* --regexp-extended: Invoking sed. (line 113) +* --separate: Invoking sed. (line 121) +* --silent: Invoking sed. (line 34) +* --unbuffered: Invoking sed. (line 131) +* --version: Invoking sed. (line 24) +* --zero-terminated: Invoking sed. (line 139) +* -b: Invoking sed. (line 93) +* -e: Invoking sed. (line 41) +* -f: Invoking sed. (line 46) +* -i: Invoking sed. (line 51) +* -l: Invoking sed. (line 76) +* -n: Invoking sed. (line 34) +* -n, forcing from within a script: Common Commands. (line 20) +* -r: Invoking sed. (line 113) +* -s: Invoking sed. (line 121) +* -u: Invoking sed. (line 131) +* -z: Invoking sed. (line 139) +* : (label) command: Programming Commands. + (line 14) +* = (print line number) command: Other Commands. (line 62) +* a (append text lines) command: Other Commands. (line 27) +* b (branch) command: Programming Commands. + (line 18) +* c (change to text lines) command: Other Commands. (line 52) +* D (delete first line) command: Other Commands. (line 99) +* d (delete) command: Common Commands. (line 36) +* e (evaluate) command: Extended Commands. (line 12) +* F (File name) command: Extended Commands. (line 30) +* G (appending Get) command: Other Commands. (line 125) +* g (get) command: Other Commands. (line 121) +* H (append Hold) command: Other Commands. (line 117) +* h (hold) command: Other Commands. (line 113) +* i (insert text lines) command: Other Commands. (line 46) +* L (fLow paragraphs) command: Extended Commands. (line 34) +* l (list unambiguously) command: Other Commands. (line 65) +* N (append Next line) command: Other Commands. (line 105) +* n (next-line) command: Common Commands. (line 44) +* P (print first line) command: Other Commands. (line 110) +* p (print) command: Common Commands. (line 39) +* q (quit) command: Common Commands. (line 30) +* Q (silent Quit) command: Extended Commands. (line 44) +* r (read file) command: Other Commands. (line 78) +* R (read line) command: Extended Commands. (line 61) +* s command, option flags: The "s" Command. (line 65) +* T (test and branch if failed) command: Extended Commands. (line 71) +* t (test and branch if successful) command: Programming Commands. + (line 22) +* v (version) command: Extended Commands. (line 77) +* w (write file) command: Other Commands. (line 88) +* W (write first line) command: Extended Commands. (line 88) +* x (eXchange) command: Other Commands. (line 129) +* y (transliterate) command: Other Commands. (line 14) +* z (Zap) command: Extended Commands. (line 93) +* {} command grouping: Common Commands. (line 50) + + + +Tag Table: +Node: Top944 +Node: Introduction3867 +Node: Invoking sed4421 +Ref: Invoking sed-Footnote-110793 +Ref: Invoking sed-Footnote-210985 +Node: sed Programs11084 +Node: Execution Cycle12617 +Ref: Execution Cycle-Footnote-113794 +Node: Addresses14095 +Node: Regular Expressions18996 +Node: Common Commands26905 +Node: The "s" Command28908 +Ref: The "s" Command-Footnote-134229 +Node: Other Commands34301 +Ref: Other Commands-Footnote-139501 +Node: Programming Commands39573 +Node: Extended Commands40487 +Node: Escapes44752 +Ref: Escapes-Footnote-147763 +Node: Examples47954 +Node: Centering lines49050 +Node: Increment a number49942 +Ref: Increment a number-Footnote-151419 +Node: Rename files to lower case51539 +Node: Print bash environment54312 +Node: Reverse chars of lines55067 +Ref: Reverse chars of lines-Footnote-156068 +Node: tac56285 +Node: cat -n57052 +Node: cat -b58874 +Node: wc -c59621 +Ref: wc -c-Footnote-161529 +Node: wc -w61598 +Node: wc -l63062 +Node: head63306 +Node: tail63637 +Node: uniq65318 +Node: uniq -d66106 +Node: uniq -u66817 +Node: cat -s67528 +Node: Limitations69379 +Node: Other Resources70220 +Node: Reporting Bugs71065 +Ref: Reporting Bugs-Footnote-178131 +Node: Extended regexps78202 +Node: Concept Index79517 +Node: Command and Option Index94612 + +End Tag Table diff --git a/doc/sed.texi b/doc/sed.texi new file mode 100644 index 0000000..6efc48c --- /dev/null +++ b/doc/sed.texi @@ -0,0 +1,4356 @@ +\input texinfo @c -*-texinfo-*- +@c Do not edit this file!! It is automatically generated from sed-in.texi. +@c +@c -- Stuff that needs adding: ---------------------------------------------- +@c (nothing!) +@c -------------------------------------------------------------------------- +@c Check for consistency: regexps in @code, text that they match in @samp. +@c +@c Tips: +@c @command for command +@c @samp for command fragments: @samp{cat -s} +@c @code for sed commands and flags +@c Use ``quote'' not `quote' or "quote". +@c +@c %**start of header +@setfilename sed.info +@settitle sed, a stream editor +@c %**end of header + +@c @smallbook + +@include version.texi + +@c Combine indices. +@syncodeindex ky cp +@syncodeindex pg cp +@syncodeindex tp cp + +@defcodeindex op +@syncodeindex op fn + +@include config.texi + +@copying +This file documents version @value{VERSION} of +@value{SSED}, a stream editor. + +Copyright @copyright{} 1998, 1999, 2001, 2002, 2003, 2004 Free +Software Foundation, Inc. + +This document is released under the terms of the @acronym{GNU} Free +Documentation License as published by the Free Software Foundation; +either version 1.1, or (at your option) any later version. + +You should have received a copy of the @acronym{GNU} Free Documentation +License along with @value{SSED}; see the file @file{COPYING.DOC}. +If not, write to the Free Software Foundation, 59 Temple Place - Suite +330, Boston, MA 02110-1301, USA. + +There are no Cover Texts and no Invariant Sections; this text, along +with its equivalent in the printed manual, constitutes the Title Page. +@end copying + +@setchapternewpage off + +@titlepage +@title @command{sed}, a stream editor +@subtitle version @value{VERSION}, @value{UPDATED} +@author by Ken Pizzini, Paolo Bonzini + +@page +@vskip 0pt plus 1filll +Copyright @copyright{} 1998, 1999 Free Software Foundation, Inc. + +@insertcopying + +Published by the Free Software Foundation, @* +51 Franklin Street, Fifth Floor @* +Boston, MA 02110-1301, USA +@end titlepage + + +@node Top +@top + +@ifnottex +@insertcopying +@end ifnottex + +@menu +* Introduction:: Introduction +* Invoking sed:: Invocation +* sed Programs:: @command{sed} programs +* Examples:: Some sample scripts +* Limitations:: Limitations and (non-)limitations of @value{SSED} +* Other Resources:: Other resources for learning about @command{sed} +* Reporting Bugs:: Reporting bugs + +* Extended regexps:: @command{egrep}-style regular expressions +@ifset PERL +* Perl regexps:: Perl-style regular expressions +@end ifset + +* Concept Index:: A menu with all the topics in this manual. +* Command and Option Index:: A menu with all @command{sed} commands and + command-line options. + +@detailmenu +--- The detailed node listing --- + +sed Programs: +* Execution Cycle:: How @command{sed} works +* Addresses:: Selecting lines with @command{sed} +* Regular Expressions:: Overview of regular expression syntax +* Common Commands:: Often used commands +* The "s" Command:: @command{sed}'s Swiss Army Knife +* Other Commands:: Less frequently used commands +* Programming Commands:: Commands for @command{sed} gurus +* Extended Commands:: Commands specific of @value{SSED} +* Escapes:: Specifying special characters + +Examples: +* Centering lines:: +* Increment a number:: +* Rename files to lower case:: +* Print bash environment:: +* Reverse chars of lines:: +* tac:: Reverse lines of files +* cat -n:: Numbering lines +* cat -b:: Numbering non-blank lines +* wc -c:: Counting chars +* wc -w:: Counting words +* wc -l:: Counting lines +* head:: Printing the first lines +* tail:: Printing the last lines +* uniq:: Make duplicate lines unique +* uniq -d:: Print duplicated lines of input +* uniq -u:: Remove all duplicated lines +* cat -s:: Squeezing blank lines + +@ifset PERL +Perl regexps:: Perl-style regular expressions +* Backslash:: Introduces special sequences +* Circumflex/dollar sign/period:: Behave specially with regard to new lines +* Square brackets:: Are a bit different in strange cases +* Options setting:: Toggle modifiers in the middle of a regexp +* Non-capturing subpatterns:: Are not counted when backreferencing +* Repetition:: Allows for non-greedy matching +* Backreferences:: Allows for more than 10 back references +* Assertions:: Allows for complex look ahead matches +* Non-backtracking subpatterns:: Often gives more performance +* Conditional subpatterns:: Allows if/then/else branches +* Recursive patterns:: For example to match parentheses +* Comments:: Because things can get complex... +@end ifset + +@end detailmenu +@end menu + + +@node Introduction +@chapter Introduction + +@cindex Stream editor +@command{sed} is a stream editor. +A stream editor is used to perform basic text +transformations on an input stream +(a file or input from a pipeline). +While in some ways similar to an editor which +permits scripted edits (such as @command{ed}), +@command{sed} works by making only one pass over the +input(s), and is consequently more efficient. +But it is @command{sed}'s ability to filter text in a pipeline +which particularly distinguishes it from other types of +editors. + + +@node Invoking sed +@chapter Invocation + +Normally @command{sed} is invoked like this: + +@example +sed SCRIPT INPUTFILE... +@end example + +The full format for invoking @command{sed} is: + +@example +sed OPTIONS... [SCRIPT] [INPUTFILE...] +@end example + +If you do not specify @var{INPUTFILE}, or if @var{INPUTFILE} is @file{-}, +@command{sed} filters the contents of the standard input. The @var{script} +is actually the first non-option parameter, which @command{sed} specially +considers a script and not an input file if (and only if) none of the +other @var{options} specifies a script to be executed, that is if neither +of the @option{-e} and @option{-f} options is specified. + +@command{sed} may be invoked with the following command-line options: + +@table @code +@item --version +@opindex --version +@cindex Version, printing +Print out the version of @command{sed} that is being run and a copyright notice, +then exit. + +@item --help +@opindex --help +@cindex Usage summary, printing +Print a usage message briefly summarizing these command-line options +and the bug-reporting address, +then exit. + +@item -n +@itemx --quiet +@itemx --silent +@opindex -n +@opindex --quiet +@opindex --silent +@cindex Disabling autoprint, from command line +By default, @command{sed} prints out the pattern space +at the end of each cycle through the script (@pxref{Execution Cycle, , +How @code{sed} works}). +These options disable this automatic printing, +and @command{sed} only produces output when explicitly told to +via the @code{p} command. + +@item -e @var{script} +@itemx --expression=@var{script} +@opindex -e +@opindex --expression +@cindex Script, from command line +Add the commands in @var{script} to the set of commands to be +run while processing the input. + +@item -f @var{script-file} +@itemx --file=@var{script-file} +@opindex -f +@opindex --file +@cindex Script, from a file +Add the commands contained in the file @var{script-file} +to the set of commands to be run while processing the input. + +@item -i[@var{SUFFIX}] +@itemx --in-place[=@var{SUFFIX}] +@opindex -i +@opindex --in-place +@cindex In-place editing, activating +@cindex @value{SSEDEXT}, in-place editing +This option specifies that files are to be edited in-place. +@value{SSED} does this by creating a temporary file and +sending output to this file rather than to the standard +output.@footnote{This applies to commands such as @code{=}, +@code{a}, @code{c}, @code{i}, @code{l}, @code{p}. You can +still write to the standard output by using the @code{w} +@cindex @value{SSEDEXT}, @file{/dev/stdout} file +or @code{W} commands together with the @file{/dev/stdout} +special file}. + +This option implies @option{-s}. + +When the end of the file is reached, the temporary file is +renamed to the output file's original name. The extension, +if supplied, is used to modify the name of the old file +before renaming the temporary file, thereby making a backup +copy@footnote{Note that @value{SSED} creates the backup +file whether or not any output is actually changed.}). + +@cindex In-place editing, Perl-style backup file names +This rule is followed: if the extension doesn't contain a @code{*}, +then it is appended to the end of the current filename as a +suffix; if the extension does contain one or more @code{*} +characters, then @emph{each} asterisk is replaced with the +current filename. This allows you to add a prefix to the +backup file, instead of (or in addition to) a suffix, or +even to place backup copies of the original files into another +directory (provided the directory already exists). + +If no extension is supplied, the original file is +overwritten without making a backup. + +@item -l @var{N} +@itemx --line-length=@var{N} +@opindex -l +@opindex --line-length +@cindex Line length, setting +Specify the default line-wrap length for the @code{l} command. +A length of 0 (zero) means to never wrap long lines. If +not specified, it is taken to be 70. + +@item --posix +@opindex --posix +@cindex @value{SSEDEXT}, disabling +@value{SSED} includes several extensions to @acronym{POSIX} +sed. In order to simplify writing portable scripts, this +option disables all the extensions that this manual documents, +including additional commands. +@cindex @code{POSIXLY_CORRECT} behavior, enabling +Most of the extensions accept @command{sed} programs that +are outside the syntax mandated by @acronym{POSIX}, but some +of them (such as the behavior of the @command{N} command +described in @pxref{Reporting Bugs}) actually violate the +standard. If you want to disable only the latter kind of +extension, you can set the @code{POSIXLY_CORRECT} variable +to a non-empty value. + +@item -b +@itemx --binary +@opindex -b +@opindex --binary +This option is available on every platform, but is only effective where the +operating system makes a distinction between text files and binary files. +When such a distinction is made---as is the case for MS-DOS, Windows, +Cygwin---text files are composed of lines separated by a carriage return +@emph{and} a line feed character, and @command{sed} does not see the +ending CR. When this option is specified, @command{sed} will open +input files in binary mode, thus not requesting this special processing +and considering lines to end at a line feed. + +@item --follow-symlinks +@opindex --follow-symlinks +This option is available only on platforms that support +symbolic links and has an effect only if option @option{-i} +is specified. In this case, if the file that is specified +on the command line is a symbolic link, @command{sed} will +follow the link and edit the ultimate destination of the +link. The default behavior is to break the symbolic link, +so that the link destination will not be modified. + +@item -r +@itemx --regexp-extended +@opindex -r +@opindex --regexp-extended +@cindex Extended regular expressions, choosing +@cindex @acronym{GNU} extensions, extended regular expressions +Use extended regular expressions rather than basic +regular expressions. Extended regexps are those that +@command{egrep} accepts; they can be clearer because they +usually have less backslashes, but are a @acronym{GNU} extension +and hence scripts that use them are not portable. +@xref{Extended regexps, , Extended regular expressions}. + +@ifset PERL +@item -R +@itemx --regexp-perl +@opindex -R +@opindex --regexp-perl +@cindex Perl-style regular expressions, choosing +@cindex @value{SSEDEXT}, Perl-style regular expressions +Use Perl-style regular expressions rather than basic +regular expressions. Perl-style regexps are extremely +powerful but are a @value{SSED} extension and hence scripts that +use it are not portable. @xref{Perl regexps, , +Perl-style regular expressions}. +@end ifset + +@item -s +@itemx --separate +@opindex -s +@opindex --separate +@cindex Working on separate files +By default, @command{sed} will consider the files specified on the +command line as a single continuous long stream. This @value{SSED} +extension allows the user to consider them as separate files: +range addresses (such as @samp{/abc/,/def/}) are not allowed +to span several files, line numbers are relative to the start +of each file, @code{$} refers to the last line of each file, +and files invoked from the @code{R} commands are rewound at the +start of each file. + +@item -u +@itemx --unbuffered +@opindex -u +@opindex --unbuffered +@cindex Unbuffered I/O, choosing +Buffer both input and output as minimally as practical. +(This is particularly useful if the input is coming from +the likes of @samp{tail -f}, and you wish to see the transformed +output as soon as possible.) + +@item -z +@itemx --null-data +@itemx --zero-terminated +@opindex -z +@opindex --null-data +@opindex --zero-terminated +Treat the input as a set of lines, each terminated by a zero byte +(the ASCII @samp{NUL} character) instead of a newline. This option can +be used with commands like @samp{sort -z} and @samp{find -print0} +to process arbitrary file names. +@end table + +If no @option{-e}, @option{-f}, @option{--expression}, or @option{--file} +options are given on the command-line, +then the first non-option argument on the command line is +taken to be the @var{script} to be executed. + +@cindex Files to be processed as input +If any command-line parameters remain after processing the above, +these parameters are interpreted as the names of input files to +be processed. +@cindex Standard input, processing as input +A file name of @samp{-} refers to the standard input stream. +The standard input will be processed if no file names are specified. + + +@node sed Programs +@chapter @command{sed} Programs + +@cindex @command{sed} program structure +@cindex Script structure +A @command{sed} program consists of one or more @command{sed} commands, +passed in by one or more of the +@option{-e}, @option{-f}, @option{--expression}, and @option{--file} +options, or the first non-option argument if zero of these +options are used. +This document will refer to ``the'' @command{sed} script; +this is understood to mean the in-order catenation +of all of the @var{script}s and @var{script-file}s passed in. + +Commands within a @var{script} or @var{script-file} can be +separated by semicolons (@code{;}) or newlines (ASCII 10). +Some commands, due to their syntax, cannot be followed by semicolons +working as command separators and thus should be terminated +with newlines or be placed at the end of a @var{script} or @var{script-file}. +Commands can also be preceded with optional non-significant +whitespace characters. + +Each @code{sed} command consists of an optional address or +address range, followed by a one-character command name +and any additional command-specific code. + +@menu +* Execution Cycle:: How @command{sed} works +* Addresses:: Selecting lines with @command{sed} +* Regular Expressions:: Overview of regular expression syntax +* Common Commands:: Often used commands +* The "s" Command:: @command{sed}'s Swiss Army Knife +* Other Commands:: Less frequently used commands +* Programming Commands:: Commands for @command{sed} gurus +* Extended Commands:: Commands specific of @value{SSED} +* Escapes:: Specifying special characters +@end menu + + +@node Execution Cycle +@section How @command{sed} Works + +@cindex Buffer spaces, pattern and hold +@cindex Spaces, pattern and hold +@cindex Pattern space, definition +@cindex Hold space, definition +@command{sed} maintains two data buffers: the active @emph{pattern} space, +and the auxiliary @emph{hold} space. Both are initially empty. + +@command{sed} operates by performing the following cycle on each +line of input: first, @command{sed} reads one line from the input +stream, removes any trailing newline, and places it in the pattern space. +Then commands are executed; each command can have an address associated +to it: addresses are a kind of condition code, and a command is only +executed if the condition is verified before the command is to be +executed. + +When the end of the script is reached, unless the @option{-n} option +is in use, the contents of pattern space are printed out to the output +stream, adding back the trailing newline if it was removed.@footnote{Actually, +if @command{sed} prints a line without the terminating newline, it will +nevertheless print the missing newline as soon as more text is sent to +the same output stream, which gives the ``least expected surprise'' +even though it does not make commands like @samp{sed -n p} exactly +identical to @command{cat}.} Then the next cycle starts for the next +input line. + +Unless special commands (like @samp{D}) are used, the pattern space is +deleted between two cycles. The hold space, on the other hand, keeps +its data between cycles (see commands @samp{h}, @samp{H}, @samp{x}, +@samp{g}, @samp{G} to move data between both buffers). + + +@node Addresses +@section Selecting lines with @command{sed} +@cindex Addresses, in @command{sed} scripts +@cindex Line selection +@cindex Selecting lines to process + +Addresses in a @command{sed} script can be in any of the following forms: +@table @code +@item @var{number} +@cindex Address, numeric +@cindex Line, selecting by number +Specifying a line number will match only that line in the input. +(Note that @command{sed} counts lines continuously across all input files +unless @option{-i} or @option{-s} options are specified.) + +@item @var{first}~@var{step} +@cindex @acronym{GNU} extensions, @samp{@var{n}~@var{m}} addresses +This @acronym{GNU} extension matches every @var{step}th line +starting with line @var{first}. +In particular, lines will be selected when there exists +a non-negative @var{n} such that the current line-number equals +@var{first} + (@var{n} * @var{step}). +Thus, to select the odd-numbered lines, +one would use @code{1~2}; +to pick every third line starting with the second, @samp{2~3} would be used; +to pick every fifth line starting with the tenth, use @samp{10~5}; +and @samp{50~0} is just an obscure way of saying @code{50}. + +@item $ +@cindex Address, last line +@cindex Last line, selecting +@cindex Line, selecting last +This address matches the last line of the last file of input, or +the last line of each file when the @option{-i} or @option{-s} options +are specified. + +@item /@var{regexp}/ +@cindex Address, as a regular expression +@cindex Line, selecting by regular expression match +This will select any line which matches the regular expression @var{regexp}. +If @var{regexp} itself includes any @code{/} characters, +each must be escaped by a backslash (@code{\}). + +@cindex empty regular expression +@cindex @value{SSEDEXT}, modifiers and the empty regular expression +The empty regular expression @samp{//} repeats the last regular +expression match (the same holds if the empty regular expression is +passed to the @code{s} command). Note that modifiers to regular expressions +are evaluated when the regular expression is compiled, thus it is invalid to +specify them together with the empty regular expression. + +@item \%@var{regexp}% +(The @code{%} may be replaced by any other single character.) + +@cindex Slash character, in regular expressions +This also matches the regular expression @var{regexp}, +but allows one to use a different delimiter than @code{/}. +This is particularly useful if the @var{regexp} itself contains +a lot of slashes, since it avoids the tedious escaping of every @code{/}. +If @var{regexp} itself includes any delimiter characters, +each must be escaped by a backslash (@code{\}). + +@item /@var{regexp}/I +@itemx \%@var{regexp}%I +@cindex @acronym{GNU} extensions, @code{I} modifier +@ifset PERL +@cindex Perl-style regular expressions, case-insensitive +@end ifset +The @code{I} modifier to regular-expression matching is a @acronym{GNU} +extension which causes the @var{regexp} to be matched in +a case-insensitive manner. + +@item /@var{regexp}/M +@itemx \%@var{regexp}%M +@cindex @value{SSEDEXT}, @code{M} modifier +@ifset PERL +@cindex Perl-style regular expressions, multiline +@end ifset +The @code{M} modifier to regular-expression matching is a @value{SSED} +extension which directs @value{SSED} to match the regular expression +in @cite{multi-line} mode. The modifier causes @code{^} and @code{$} to +match respectively (in addition to the normal behavior) the empty string +after a newline, and the empty string before a newline. There are +special character sequences +@ifset PERL +(@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'} +in basic or extended regular expression modes) +@end ifset +@ifclear PERL +(@code{\`} and @code{\'}) +@end ifclear +which always match the beginning or the end of the buffer. +In addition, +@ifset PERL +just like in Perl mode without the @code{S} modifier, +@end ifset +the period character does not match a new-line character in +multi-line mode. + +@ifset PERL +@item /@var{regexp}/S +@itemx \%@var{regexp}%S +@cindex @value{SSEDEXT}, @code{S} modifier +@cindex Perl-style regular expressions, single line +The @code{S} modifier to regular-expression matching is only valid +in Perl mode and specifies that the dot character (@code{.}) will +match the newline character too. @code{S} stands for @cite{single-line}. +@end ifset + +@ifset PERL +@item /@var{regexp}/X +@itemx \%@var{regexp}%X +@cindex @value{SSEDEXT}, @code{X} modifier +@cindex Perl-style regular expressions, extended +The @code{X} modifier to regular-expression matching is also +valid in Perl mode only. If it is used, whitespace in the +pattern (other than in a character class) and +characters between a @kbd{#} outside a character class and the +next newline character are ignored. An escaping backslash +can be used to include a whitespace or @kbd{#} character as part +of the pattern. +@end ifset +@end table + +If no addresses are given, then all lines are matched; +if one address is given, then only lines matching that +address are matched. + +@cindex Range of lines +@cindex Several lines, selecting +An address range can be specified by specifying two addresses +separated by a comma (@code{,}). An address range matches lines +starting from where the first address matches, and continues +until the second address matches (inclusively). + +If the second address is a @var{regexp}, then checking for the +ending match will start with the line @emph{following} the +line which matched the first address: a range will always +span at least two lines (except of course if the input stream +ends). + +If the second address is a @var{number} less than (or equal to) +the line matching the first address, then only the one line is +matched. + +@cindex Special addressing forms +@cindex Range with start address of zero +@cindex Zero, as range start address +@cindex @var{addr1},+N +@cindex @var{addr1},~N +@cindex @acronym{GNU} extensions, special two-address forms +@cindex @acronym{GNU} extensions, @code{0} address +@cindex @acronym{GNU} extensions, 0,@var{addr2} addressing +@cindex @acronym{GNU} extensions, @var{addr1},+@var{N} addressing +@cindex @acronym{GNU} extensions, @var{addr1},~@var{N} addressing +@value{SSED} also supports some special two-address forms; all these +are @acronym{GNU} extensions: +@table @code +@item 0,/@var{regexp}/ +A line number of @code{0} can be used in an address specification like +@code{0,/@var{regexp}/} so that @command{sed} will try to match +@var{regexp} in the first input line too. In other words, +@code{0,/@var{regexp}/} is similar to @code{1,/@var{regexp}/}, +except that if @var{addr2} matches the very first line of input the +@code{0,/@var{regexp}/} form will consider it to end the range, whereas +the @code{1,/@var{regexp}/} form will match the beginning of its range and +hence make the range span up to the @emph{second} occurrence of the +regular expression. + +Note that this is the only place where the @code{0} address makes +sense; there is no 0-th line and commands which are given the @code{0} +address in any other way will give an error. + +@item @var{addr1},+@var{N} +Matches @var{addr1} and the @var{N} lines following @var{addr1}. + +@item @var{addr1},~@var{N} +Matches @var{addr1} and the lines following @var{addr1} +until the next line whose input line number is a multiple of @var{N}. +@end table + +@cindex Excluding lines +@cindex Selecting non-matching lines +Appending the @code{!} character to the end of an address +specification negates the sense of the match. +That is, if the @code{!} character follows an address range, +then only lines which do @emph{not} match the address range +will be selected. +This also works for singleton addresses, +and, perhaps perversely, for the null address. + + +@node Regular Expressions +@section Overview of Regular Expression Syntax + +To know how to use @command{sed}, people should understand regular +expressions (@dfn{regexp} for short). A regular expression +is a pattern that is matched against a +subject string from left to right. Most characters are +@dfn{ordinary}: they stand for +themselves in a pattern, and match the corresponding characters +in the subject. As a trivial example, the pattern + +@example +The quick brown fox +@end example + +@noindent +matches a portion of a subject string that is identical to +itself. The power of regular expressions comes from the +ability to include alternatives and repetitions in the pattern. +These are encoded in the pattern by the use of @dfn{special characters}, +which do not stand for themselves but instead +are interpreted in some special way. Here is a brief description +of regular expression syntax as used in @command{sed}. + +@table @code +@item @var{char} +A single ordinary character matches itself. + +@item * +@cindex @acronym{GNU} extensions, to basic regular expressions +Matches a sequence of zero or more instances of matches for the +preceding regular expression, which must be an ordinary character, a +special character preceded by @code{\}, a @code{.}, a grouped regexp +(see below), or a bracket expression. As a @acronym{GNU} extension, a +postfixed regular expression can also be followed by @code{*}; for +example, @code{a**} is equivalent to @code{a*}. @acronym{POSIX} +1003.1-2001 says that @code{*} stands for itself when it appears at +the start of a regular expression or subexpression, but many +non@acronym{GNU} implementations do not support this and portable +scripts should instead use @code{\*} in these contexts. + +@item \+ +@cindex @acronym{GNU} extensions, to basic regular expressions +As @code{*}, but matches one or more. It is a @acronym{GNU} extension. + +@item \? +@cindex @acronym{GNU} extensions, to basic regular expressions +As @code{*}, but only matches zero or one. It is a @acronym{GNU} extension. + +@item \@{@var{i}\@} +As @code{*}, but matches exactly @var{i} sequences (@var{i} is a +decimal integer; for portability, keep it between 0 and 255 +inclusive). + +@item \@{@var{i},@var{j}\@} +Matches between @var{i} and @var{j}, inclusive, sequences. + +@item \@{@var{i},\@} +Matches more than or equal to @var{i} sequences. + +@item \(@var{regexp}\) +Groups the inner @var{regexp} as a whole, this is used to: + +@itemize @bullet +@item +@cindex @acronym{GNU} extensions, to basic regular expressions +Apply postfix operators, like @code{\(abcd\)*}: +this will search for zero or more whole sequences +of @samp{abcd}, while @code{abcd*} would search +for @samp{abc} followed by zero or more occurrences +of @samp{d}. Note that support for @code{\(abcd\)*} is +required by @acronym{POSIX} 1003.1-2001, but many non-@acronym{GNU} +implementations do not support it and hence it is not universally +portable. + +@item +Use back references (see below). +@end itemize + +@item . +Matches any character, including newline. + +@item ^ +Matches the null string at beginning of the pattern space, i.e. what +appears after the circumflex must appear at the beginning of the +pattern space. + +In most scripts, pattern space is initialized to the content of each +line (@pxref{Execution Cycle, , How @code{sed} works}). So, it is a +useful simplification to think of @code{^#include} as matching only +lines where @samp{#include} is the first thing on line---if there are +spaces before, for example, the match fails. This simplification is +valid as long as the original content of pattern space is not modified, +for example with an @code{s} command. + +@code{^} acts as a special character only at the beginning of the +regular expression or subexpression (that is, after @code{\(} or +@code{\|}). Portable scripts should avoid @code{^} at the beginning of +a subexpression, though, as @acronym{POSIX} allows implementations that +treat @code{^} as an ordinary character in that context. + +@item $ +It is the same as @code{^}, but refers to end of pattern space. +@code{$} also acts as a special character only at the end +of the regular expression or subexpression (that is, before @code{\)} +or @code{\|}), and its use at the end of a subexpression is not +portable. + + +@item [@var{list}] +@itemx [^@var{list}] +Matches any single character in @var{list}: for example, +@code{[aeiou]} matches all vowels. A list may include +sequences like @code{@var{char1}-@var{char2}}, which +matches any character between (inclusive) @var{char1} +and @var{char2}. + +A leading @code{^} reverses the meaning of @var{list}, so that +it matches any single character @emph{not} in @var{list}. To include +@code{]} in the list, make it the first character (after +the @code{^} if needed), to include @code{-} in the list, +make it the first or last; to include @code{^} put +it after the first character. + +@cindex @code{POSIXLY_CORRECT} behavior, bracket expressions +The characters @code{$}, @code{*}, @code{.}, @code{[}, and @code{\} +are normally not special within @var{list}. For example, @code{[\*]} +matches either @samp{\} or @samp{*}, because the @code{\} is not +special here. However, strings like @code{[.ch.]}, @code{[=a=]}, and +@code{[:space:]} are special within @var{list} and represent collating +symbols, equivalence classes, and character classes, respectively, and +@code{[} is therefore special within @var{list} when it is followed by +@code{.}, @code{=}, or @code{:}. Also, when not in +@env{POSIXLY_CORRECT} mode, special escapes like @code{\n} and +@code{\t} are recognized within @var{list}. @xref{Escapes}. + +@item @var{regexp1}\|@var{regexp2} +@cindex @acronym{GNU} extensions, to basic regular expressions +Matches either @var{regexp1} or @var{regexp2}. Use +parentheses to use complex alternative regular expressions. +The matching process tries each alternative in turn, from +left to right, and the first one that succeeds is used. +It is a @acronym{GNU} extension. + +@item @var{regexp1}@var{regexp2} +Matches the concatenation of @var{regexp1} and @var{regexp2}. +Concatenation binds more tightly than @code{\|}, @code{^}, and +@code{$}, but less tightly than the other regular expression +operators. + +@item \@var{digit} +Matches the @var{digit}-th @code{\(@dots{}\)} parenthesized +subexpression in the regular expression. This is called a @dfn{back +reference}. Subexpressions are implicity numbered by counting +occurrences of @code{\(} left-to-right. + +@item \n +Matches the newline character. + +@item \@var{char} +Matches @var{char}, where @var{char} is one of @code{$}, +@code{*}, @code{.}, @code{[}, @code{\}, or @code{^}. +Note that the only C-like +backslash sequences that you can portably assume to be +interpreted are @code{\n} and @code{\\}; in particular +@code{\t} is not portable, and matches a @samp{t} under most +implementations of @command{sed}, rather than a tab character. + +@end table + +@cindex Greedy regular expression matching +Note that the regular expression matcher is greedy, i.e., matches +are attempted from left to right and, if two or more matches are +possible starting at the same character, it selects the longest. + +@noindent +Examples: +@table @samp +@item abcdef +Matches @samp{abcdef}. + +@item a*b +Matches zero or more @samp{a}s followed by a single +@samp{b}. For example, @samp{b} or @samp{aaaaab}. + +@item a\?b +Matches @samp{b} or @samp{ab}. + +@item a\+b\+ +Matches one or more @samp{a}s followed by one or more +@samp{b}s: @samp{ab} is the shortest possible match, but +other examples are @samp{aaaab} or @samp{abbbbb} or +@samp{aaaaaabbbbbbb}. + +@item .* +@itemx .\+ +These two both match all the characters in a string; +however, the first matches every string (including the empty +string), while the second matches only strings containing +at least one character. + +@item ^main.*(.*) +This matches a string starting with @samp{main}, +followed by an opening and closing +parenthesis. The @samp{n}, @samp{(} and @samp{)} need not +be adjacent. + +@item ^# +This matches a string beginning with @samp{#}. + +@item \\$ +This matches a string ending with a single backslash. The +regexp contains two backslashes for escaping. + +@item \$ +Instead, this matches a string consisting of a single dollar sign, +because it is escaped. + +@item [a-zA-Z0-9] +In the C locale, this matches any @acronym{ASCII} letters or digits. + +@item [^ @kbd{tab}]\+ +(Here @kbd{tab} stands for a single tab character.) +This matches a string of one or more +characters, none of which is a space or a tab. +Usually this means a word. + +@item ^\(.*\)\n\1$ +This matches a string consisting of two equal substrings separated by +a newline. + +@item .\@{9\@}A$ +This matches nine characters followed by an @samp{A}. + +@item ^.\@{15\@}A +This matches the start of a string that contains 16 characters, +the last of which is an @samp{A}. + +@end table + + + +@node Common Commands +@section Often-Used Commands + +If you use @command{sed} at all, you will quite likely want to know +these commands. + +@table @code +@item # +[No addresses allowed.] + +@findex # (comments) +@cindex Comments, in scripts +The @code{#} character begins a comment; +the comment continues until the next newline. + +@cindex Portability, comments +If you are concerned about portability, be aware that +some implementations of @command{sed} (which are not @sc{posix} +conformant) may only support a single one-line comment, +and then only when the very first character of the script is a @code{#}. + +@findex -n, forcing from within a script +@cindex Caveat --- #n on first line +Warning: if the first two characters of the @command{sed} script +are @code{#n}, then the @option{-n} (no-autoprint) option is forced. +If you want to put a comment in the first line of your script +and that comment begins with the letter @samp{n} +and you do not want this behavior, +then be sure to either use a capital @samp{N}, +or place at least one space before the @samp{n}. + +@item q [@var{exit-code}] +This command only accepts a single address. + +@findex q (quit) command +@cindex @value{SSEDEXT}, returning an exit code +@cindex Quitting +Exit @command{sed} without processing any more commands or input. +Note that the current pattern space is printed if auto-print is +not disabled with the @option{-n} options. The ability to return +an exit code from the @command{sed} script is a @value{SSED} extension. + +@item d +@findex d (delete) command +@cindex Text, deleting +Delete the pattern space; +immediately start next cycle. + +@item p +@findex p (print) command +@cindex Text, printing +Print out the pattern space (to the standard output). +This command is usually only used in conjunction with the @option{-n} +command-line option. + +@item n +@findex n (next-line) command +@cindex Next input line, replace pattern space with +@cindex Read next input line +If auto-print is not disabled, print the pattern space, +then, regardless, replace the pattern space with the next line of input. +If there is no more input then @command{sed} exits without processing +any more commands. + +@item @{ @var{commands} @} +@findex @{@} command grouping +@cindex Grouping commands +@cindex Command groups +A group of commands may be enclosed between +@code{@{} and @code{@}} characters. +This is particularly useful when you want a group of commands +to be triggered by a single address (or address-range) match. + +@end table + +@node The "s" Command +@section The @code{s} Command + +The syntax of the @code{s} (as in substitute) command is +@samp{s/@var{regexp}/@var{replacement}/@var{flags}}. The @code{/} +characters may be uniformly replaced by any other single +character within any given @code{s} command. The @code{/} +character (or whatever other character is used in its stead) +can appear in the @var{regexp} or @var{replacement} +only if it is preceded by a @code{\} character. + +The @code{s} command is probably the most important in @command{sed} +and has a lot of different options. Its basic concept is simple: +the @code{s} command attempts to match the pattern +space against the supplied @var{regexp}; if the match is +successful, then that portion of the pattern +space which was matched is replaced with @var{replacement}. + +@cindex Backreferences, in regular expressions +@cindex Parenthesized substrings +The @var{replacement} can contain @code{\@var{n}} (@var{n} being +a number from 1 to 9, inclusive) references, which refer to +the portion of the match which is contained between the @var{n}th +@code{\(} and its matching @code{\)}. +Also, the @var{replacement} can contain unescaped @code{&} +characters which reference the whole matched portion +of the pattern space. +@cindex @value{SSEDEXT}, case modifiers in @code{s} commands +Finally, as a @value{SSED} extension, you can include a +special sequence made of a backslash and one of the letters +@code{L}, @code{l}, @code{U}, @code{u}, or @code{E}. +The meaning is as follows: + +@table @code +@item \L +Turn the replacement +to lowercase until a @code{\U} or @code{\E} is found, + +@item \l +Turn the +next character to lowercase, + +@item \U +Turn the replacement to uppercase +until a @code{\L} or @code{\E} is found, + +@item \u +Turn the next character +to uppercase, + +@item \E +Stop case conversion started by @code{\L} or @code{\U}. +@end table + +When the @code{g} flag is being used, case conversion does not +propagate from one occurrence of the regular expression to +another. For example, when the following command is executed +with @samp{a-b-} in pattern space: +@example +s/\(b\?\)-/x\u\1/g +@end example + +@noindent +the output is @samp{axxB}. When replacing the first @samp{-}, +the @samp{\u} sequence only affects the empty replacement of +@samp{\1}. It does not affect the @code{x} character that is +added to pattern space when replacing @code{b-} with @code{xB}. + +On the other hand, @code{\l} and @code{\u} do affect the remainder +of the replacement text if they are followed by an empty substitution. +With @samp{a-b-} in pattern space, the following command: +@example +s/\(b\?\)-/\u\1x/g +@end example + +@noindent +will replace @samp{-} with @samp{X} (uppercase) and @samp{b-} with +@samp{Bx}. If this behavior is undesirable, you can prevent it by +adding a @samp{\E} sequence---after @samp{\1} in this case. + +To include a literal @code{\}, @code{&}, or newline in the final +replacement, be sure to precede the desired @code{\}, @code{&}, +or newline in the @var{replacement} with a @code{\}. + +@findex s command, option flags +@cindex Substitution of text, options +The @code{s} command can be followed by zero or more of the +following @var{flags}: + +@table @code +@item g +@cindex Global substitution +@cindex Replacing all text matching regexp in a line +Apply the replacement to @emph{all} matches to the @var{regexp}, +not just the first. + +@item @var{number} +@cindex Replacing only @var{n}th match of regexp in a line +Only replace the @var{number}th match of the @var{regexp}. + +@cindex @acronym{GNU} extensions, @code{g} and @var{number} modifier interaction in @code{s} command +@cindex Mixing @code{g} and @var{number} modifiers in the @code{s} command +Note: the @sc{posix} standard does not specify what should happen +when you mix the @code{g} and @var{number} modifiers, +and currently there is no widely agreed upon meaning +across @command{sed} implementations. +For @value{SSED}, the interaction is defined to be: +ignore matches before the @var{number}th, +and then match and replace all matches from +the @var{number}th on. + +@item p +@cindex Text, printing after substitution +If the substitution was made, then print the new pattern space. + +Note: when both the @code{p} and @code{e} options are specified, +the relative ordering of the two produces very different results. +In general, @code{ep} (evaluate then print) is what you want, +but operating the other way round can be useful for debugging. +For this reason, the current version of @value{SSED} interprets +specially the presence of @code{p} options both before and after +@code{e}, printing the pattern space before and after evaluation, +while in general flags for the @code{s} command show their +effect just once. This behavior, although documented, might +change in future versions. + +@item w @var{file-name} +@cindex Text, writing to a file after substitution +@cindex @value{SSEDEXT}, @file{/dev/stdout} file +@cindex @value{SSEDEXT}, @file{/dev/stderr} file +If the substitution was made, then write out the result to the named file. +As a @value{SSED} extension, two special values of @var{file-name} are +supported: @file{/dev/stderr}, which writes the result to the standard +error, and @file{/dev/stdout}, which writes to the standard +output.@footnote{This is equivalent to @code{p} unless the @option{-i} +option is being used.} + +@item e +@cindex Evaluate Bourne-shell commands, after substitution +@cindex Subprocesses +@cindex @value{SSEDEXT}, evaluating Bourne-shell commands +@cindex @value{SSEDEXT}, subprocesses +This command allows one to pipe input from a shell command +into pattern space. If a substitution was made, the command +that is found in pattern space is executed and pattern space +is replaced with its output. A trailing newline is suppressed; +results are undefined if the command to be executed contains +a @sc{nul} character. This is a @value{SSED} extension. + +@item I +@itemx i +@cindex @acronym{GNU} extensions, @code{I} modifier +@cindex Case-insensitive matching +@ifset PERL +@cindex Perl-style regular expressions, case-insensitive +@end ifset +The @code{I} modifier to regular-expression matching is a @acronym{GNU} +extension which makes @command{sed} match @var{regexp} in a +case-insensitive manner. + +@item M +@itemx m +@cindex @value{SSEDEXT}, @code{M} modifier +@ifset PERL +@cindex Perl-style regular expressions, multiline +@end ifset +The @code{M} modifier to regular-expression matching is a @value{SSED} +extension which directs @value{SSED} to match the regular expression +in @cite{multi-line} mode. The modifier causes @code{^} and @code{$} to +match respectively (in addition to the normal behavior) the empty string +after a newline, and the empty string before a newline. There are +special character sequences +@ifset PERL +(@code{\A} and @code{\Z} in Perl mode, @code{\`} and @code{\'} +in basic or extended regular expression modes) +@end ifset +@ifclear PERL +(@code{\`} and @code{\'}) +@end ifclear +which always match the beginning or the end of the buffer. +In addition, +@ifset PERL +just like in Perl mode without the @code{S} modifier, +@end ifset +the period character does not match a new-line character in +multi-line mode. + +@ifset PERL +@item S +@itemx s +@cindex @value{SSEDEXT}, @code{S} modifier +@cindex Perl-style regular expressions, single line +The @code{S} modifier to regular-expression matching is only valid +in Perl mode and specifies that the dot character (@code{.}) will +match the newline character too. @code{S} stands for @cite{single-line}. +@end ifset + +@ifset PERL +@item X +@itemx x +@cindex @value{SSEDEXT}, @code{X} modifier +@cindex Perl-style regular expressions, extended +The @code{X} modifier to regular-expression matching is also +valid in Perl mode only. If it is used, whitespace in the +pattern (other than in a character class) and +characters between a @kbd{#} outside a character class and the +next newline character are ignored. An escaping backslash +can be used to include a whitespace or @kbd{#} character as part +of the pattern. +@end ifset +@end table + + +@node Other Commands +@section Less Frequently-Used Commands + +Though perhaps less frequently used than those in the previous +section, some very small yet useful @command{sed} scripts can be built with +these commands. + +@table @code +@item y/@var{source-chars}/@var{dest-chars}/ +(The @code{/} characters may be uniformly replaced by +any other single character within any given @code{y} command.) + +@findex y (transliterate) command +@cindex Transliteration +Transliterate any characters in the pattern space which match +any of the @var{source-chars} with the corresponding character +in @var{dest-chars}. + +Instances of the @code{/} (or whatever other character is used in its stead), +@code{\}, or newlines can appear in the @var{source-chars} or @var{dest-chars} +lists, provide that each instance is escaped by a @code{\}. +The @var{source-chars} and @var{dest-chars} lists @emph{must} +contain the same number of characters (after de-escaping). + +@item a\ +@itemx @var{text} +@cindex @value{SSEDEXT}, two addresses supported by most commands +As a @acronym{GNU} extension, this command accepts two addresses. + +@findex a (append text lines) command +@cindex Appending text after a line +@cindex Text, appending +Queue the lines of text which follow this command +(each but the last ending with a @code{\}, +which are removed from the output) +to be output at the end of the current cycle, +or when the next input line is read. + +Escape sequences in @var{text} are processed, so you should +use @code{\\} in @var{text} to print a single backslash. + +As a @acronym{GNU} extension, if between the @code{a} and the newline there is +other than a whitespace-@code{\} sequence, then the text of this line, +starting at the first non-whitespace character after the @code{a}, +is taken as the first line of the @var{text} block. +(This enables a simplification in scripting a one-line add.) +This extension also works with the @code{i} and @code{c} commands. + +@item i\ +@itemx @var{text} +@cindex @value{SSEDEXT}, two addresses supported by most commands +As a @acronym{GNU} extension, this command accepts two addresses. + +@findex i (insert text lines) command +@cindex Inserting text before a line +@cindex Text, insertion +Immediately output the lines of text which follow this command +(each but the last ending with a @code{\}, +which are removed from the output). + +@item c\ +@itemx @var{text} +@findex c (change to text lines) command +@cindex Replacing selected lines with other text +Delete the lines matching the address or address-range, +and output the lines of text which follow this command +(each but the last ending with a @code{\}, +which are removed from the output) +in place of the last line +(or in place of each line, if no addresses were specified). +A new cycle is started after this command is done, +since the pattern space will have been deleted. + +@item = +@cindex @value{SSEDEXT}, two addresses supported by most commands +As a @acronym{GNU} extension, this command accepts two addresses. + +@findex = (print line number) command +@cindex Printing line number +@cindex Line number, printing +Print out the current input line number (with a trailing newline). + +@item l @var{n} +@findex l (list unambiguously) command +@cindex List pattern space +@cindex Printing text unambiguously +@cindex Line length, setting +@cindex @value{SSEDEXT}, setting line length +Print the pattern space in an unambiguous form: +non-printable characters (and the @code{\} character) +are printed in C-style escaped form; long lines are split, +with a trailing @code{\} character to indicate the split; +the end of each line is marked with a @code{$}. + +@var{n} specifies the desired line-wrap length; +a length of 0 (zero) means to never wrap long lines. If omitted, +the default as specified on the command line is used. The @var{n} +parameter is a @value{SSED} extension. + +@item r @var{filename} +@cindex @value{SSEDEXT}, two addresses supported by most commands +As a @acronym{GNU} extension, this command accepts two addresses. + +@findex r (read file) command +@cindex Read text from a file +@cindex @value{SSEDEXT}, @file{/dev/stdin} file +Queue the contents of @var{filename} to be read and +inserted into the output stream at the end of the current cycle, +or when the next input line is read. +Note that if @var{filename} cannot be read, it is treated as +if it were an empty file, without any error indication. + +As a @value{SSED} extension, the special value @file{/dev/stdin} +is supported for the file name, which reads the contents of the +standard input. + +@item w @var{filename} +@findex w (write file) command +@cindex Write to a file +@cindex @value{SSEDEXT}, @file{/dev/stdout} file +@cindex @value{SSEDEXT}, @file{/dev/stderr} file +Write the pattern space to @var{filename}. +As a @value{SSED} extension, two special values of @var{file-name} are +supported: @file{/dev/stderr}, which writes the result to the standard +error, and @file{/dev/stdout}, which writes to the standard +output.@footnote{This is equivalent to @code{p} unless the @option{-i} +option is being used.} + +The file will be created (or truncated) before the first input line is +read; all @code{w} commands (including instances of the @code{w} flag +on successful @code{s} commands) which refer to the same @var{filename} +are output without closing and reopening the file. + +@item D +@findex D (delete first line) command +@cindex Delete first line from pattern space +If pattern space contains no newline, start a normal new cycle as if +the @code{d} command was issued. Otherwise, delete text in the pattern +space up to the first newline, and restart cycle with the resultant +pattern space, without reading a new line of input. + +@item N +@findex N (append Next line) command +@cindex Next input line, append to pattern space +@cindex Append next input line to pattern space +Add a newline to the pattern space, +then append the next line of input to the pattern space. +If there is no more input then @command{sed} exits without processing +any more commands. + +@item P +@findex P (print first line) command +@cindex Print first line from pattern space +Print out the portion of the pattern space up to the first newline. + +@item h +@findex h (hold) command +@cindex Copy pattern space into hold space +@cindex Replace hold space with copy of pattern space +@cindex Hold space, copying pattern space into +Replace the contents of the hold space with the contents of the pattern space. + +@item H +@findex H (append Hold) command +@cindex Append pattern space to hold space +@cindex Hold space, appending from pattern space +Append a newline to the contents of the hold space, +and then append the contents of the pattern space to that of the hold space. + +@item g +@findex g (get) command +@cindex Copy hold space into pattern space +@cindex Replace pattern space with copy of hold space +@cindex Hold space, copy into pattern space +Replace the contents of the pattern space with the contents of the hold space. + +@item G +@findex G (appending Get) command +@cindex Append hold space to pattern space +@cindex Hold space, appending to pattern space +Append a newline to the contents of the pattern space, +and then append the contents of the hold space to that of the pattern space. + +@item x +@findex x (eXchange) command +@cindex Exchange hold space with pattern space +@cindex Hold space, exchange with pattern space +Exchange the contents of the hold and pattern spaces. + +@end table + + +@node Programming Commands +@section Commands for @command{sed} gurus + +In most cases, use of these commands indicates that you are +probably better off programming in something like @command{awk} +or Perl. But occasionally one is committed to sticking +with @command{sed}, and these commands can enable one to write +quite convoluted scripts. + +@cindex Flow of control in scripts +@table @code +@item : @var{label} +[No addresses allowed.] + +@findex : (label) command +@cindex Labels, in scripts +Specify the location of @var{label} for branch commands. +In all other respects, a no-op. + +@item b @var{label} +@findex b (branch) command +@cindex Branch to a label, unconditionally +@cindex Goto, in scripts +Unconditionally branch to @var{label}. +The @var{label} may be omitted, in which case the next cycle is started. + +@item t @var{label} +@findex t (test and branch if successful) command +@cindex Branch to a label, if @code{s///} succeeded +@cindex Conditional branch +Branch to @var{label} only if there has been a successful @code{s}ubstitution +since the last input line was read or conditional branch was taken. +The @var{label} may be omitted, in which case the next cycle is started. + +@end table + +@node Extended Commands +@section Commands Specific to @value{SSED} + +These commands are specific to @value{SSED}, so you +must use them with care and only when you are sure that +hindering portability is not evil. They allow you to check +for @value{SSED} extensions or to do tasks that are required +quite often, yet are unsupported by standard @command{sed}s. + +@table @code +@item e [@var{command}] +@findex e (evaluate) command +@cindex Evaluate Bourne-shell commands +@cindex Subprocesses +@cindex @value{SSEDEXT}, evaluating Bourne-shell commands +@cindex @value{SSEDEXT}, subprocesses +This command allows one to pipe input from a shell command +into pattern space. Without parameters, the @code{e} command +executes the command that is found in pattern space and +replaces the pattern space with the output; a trailing newline +is suppressed. + +If a parameter is specified, instead, the @code{e} command +interprets it as a command and sends its output to the output stream. +The command can run across multiple lines, all but the last ending with +a back-slash. + +In both cases, the results are undefined if the command to be +executed contains a @sc{nul} character. + +Note that, unlike the @code{r} command, the output of the command will +be printed immediately; the @code{r} command instead delays the output +to the end of the current cycle. + +@item F +@findex F (File name) command +@cindex Printing file name +@cindex File name, printing +Print out the file name of the current input file (with a trailing +newline). + +@item L @var{n} +@findex L (fLow paragraphs) command +@cindex Reformat pattern space +@cindex Reformatting paragraphs +@cindex @value{SSEDEXT}, reformatting paragraphs +@cindex @value{SSEDEXT}, @code{L} command +This @value{SSED} extension fills and joins lines in pattern space +to produce output lines of (at most) @var{n} characters, like +@code{fmt} does; if @var{n} is omitted, the default as specified +on the command line is used. This command is considered a failed +experiment and unless there is enough request (which seems unlikely) +will be removed in future versions. + +@ignore +Blank lines, spaces between words, and indentation are +preserved in the output; successive input lines with different +indentation are not joined; tabs are expanded to 8 columns. + +If the pattern space contains multiple lines, they are joined, but +since the pattern space usually contains a single line, the behavior +of a simple @code{L;d} script is the same as @samp{fmt -s} (i.e., +it does not join short lines to form longer ones). + +@var{n} specifies the desired line-wrap length; if omitted, +the default as specified on the command line is used. +@end ignore + +@item Q [@var{exit-code}] +This command only accepts a single address. + +@findex Q (silent Quit) command +@cindex @value{SSEDEXT}, quitting silently +@cindex @value{SSEDEXT}, returning an exit code +@cindex Quitting +This command is the same as @code{q}, but will not print the +contents of pattern space. Like @code{q}, it provides the +ability to return an exit code to the caller. + +This command can be useful because the only alternative ways +to accomplish this apparently trivial function are to use +the @option{-n} option (which can unnecessarily complicate +your script) or resorting to the following snippet, which +wastes time by reading the whole file without any visible effect: + +@example +:eat +$d @i{@r{Quit silently on the last line}} +N @i{@r{Read another line, silently}} +g @i{@r{Overwrite pattern space each time to save memory}} +b eat +@end example + +@item R @var{filename} +@findex R (read line) command +@cindex Read text from a file +@cindex @value{SSEDEXT}, reading a file a line at a time +@cindex @value{SSEDEXT}, @code{R} command +@cindex @value{SSEDEXT}, @file{/dev/stdin} file +Queue a line of @var{filename} to be read and +inserted into the output stream at the end of the current cycle, +or when the next input line is read. +Note that if @var{filename} cannot be read, or if its end is +reached, no line is appended, without any error indication. + +As with the @code{r} command, the special value @file{/dev/stdin} +is supported for the file name, which reads a line from the +standard input. + +@item T @var{label} +@findex T (test and branch if failed) command +@cindex @value{SSEDEXT}, branch if @code{s///} failed +@cindex Branch to a label, if @code{s///} failed +@cindex Conditional branch +Branch to @var{label} only if there have been no successful +@code{s}ubstitutions since the last input line was read or +conditional branch was taken. The @var{label} may be omitted, +in which case the next cycle is started. + +@item v @var{version} +@findex v (version) command +@cindex @value{SSEDEXT}, checking for their presence +@cindex Requiring @value{SSED} +This command does nothing, but makes @command{sed} fail if +@value{SSED} extensions are not supported, simply because other +versions of @command{sed} do not implement it. In addition, you +can specify the version of @command{sed} that your script +requires, such as @code{4.0.5}. The default is @code{4.0} +because that is the first version that implemented this command. + +This command enables all @value{SSEDEXT} even if +@env{POSIXLY_CORRECT} is set in the environment. + +@item W @var{filename} +@findex W (write first line) command +@cindex Write first line to a file +@cindex @value{SSEDEXT}, writing first line to a file +Write to the given filename the portion of the pattern space up to +the first newline. Everything said under the @code{w} command about +file handling holds here too. + +@item z +@findex z (Zap) command +@cindex @value{SSEDEXT}, emptying pattern space +@cindex Emptying pattern space +This command empties the content of pattern space. It is +usually the same as @samp{s/.*//}, but is more efficient +and works in the presence of invalid multibyte sequences +in the input stream. @sc{posix} mandates that such sequences +are @emph{not} matched by @samp{.}, so that there is no portable +way to clear @command{sed}'s buffers in the middle of the +script in most multibyte locales (including UTF-8 locales). +@end table + +@node Escapes +@section @acronym{GNU} Extensions for Escapes in Regular Expressions + +@cindex @acronym{GNU} extensions, special escapes +Until this chapter, we have only encountered escapes of the form +@samp{\^}, which tell @command{sed} not to interpret the circumflex +as a special character, but rather to take it literally. For +example, @samp{\*} matches a single asterisk rather than zero +or more backslashes. + +@cindex @code{POSIXLY_CORRECT} behavior, escapes +This chapter introduces another kind of escape@footnote{All +the escapes introduced here are @acronym{GNU} +extensions, with the exception of @code{\n}. In basic regular +expression mode, setting @code{POSIXLY_CORRECT} disables them inside +bracket expressions.}---that +is, escapes that are applied to a character or sequence of characters +that ordinarily are taken literally, and that @command{sed} replaces +with a special character. This provides a way +of encoding non-printable characters in patterns in a visible manner. +There is no restriction on the appearance of non-printing characters +in a @command{sed} script but when a script is being prepared in the +shell or by text editing, it is usually easier to use one of +the following escape sequences than the binary character it +represents: + +The list of these escapes is: + +@table @code +@item \a +Produces or matches a @sc{bel} character, that is an ``alert'' (@sc{ascii} 7). + +@item \f +Produces or matches a form feed (@sc{ascii} 12). + +@item \n +Produces or matches a newline (@sc{ascii} 10). + +@item \r +Produces or matches a carriage return (@sc{ascii} 13). + +@item \t +Produces or matches a horizontal tab (@sc{ascii} 9). + +@item \v +Produces or matches a so called ``vertical tab'' (@sc{ascii} 11). + +@item \c@var{x} +Produces or matches @kbd{@sc{Control}-@var{x}}, where @var{x} is +any character. The precise effect of @samp{\c@var{x}} is as follows: +if @var{x} is a lower case letter, it is converted to upper case. +Then bit 6 of the character (hex 40) is inverted. Thus @samp{\cz} becomes +hex 1A, but @samp{\c@{} becomes hex 3B, while @samp{\c;} becomes hex 7B. + +@item \d@var{xxx} +Produces or matches a character whose decimal @sc{ascii} value is @var{xxx}. + +@item \o@var{xxx} +@ifset PERL +@item \@var{xxx} +@end ifset +Produces or matches a character whose octal @sc{ascii} value is @var{xxx}. +@ifset PERL +The syntax without the @code{o} is active in Perl mode, while the one +with the @code{o} is active in the normal or extended @sc{posix} regular +expression modes. +@end ifset + +@item \x@var{xx} +Produces or matches a character whose hexadecimal @sc{ascii} value is @var{xx}. +@end table + +@samp{\b} (backspace) was omitted because of the conflict with +the existing ``word boundary'' meaning. + +Other escapes match a particular character class and are valid only in +regular expressions: + +@table @code +@item \w +Matches any ``word'' character. A ``word'' character is any +letter or digit or the underscore character. + +@item \W +Matches any ``non-word'' character. + +@item \b +Matches a word boundary; that is it matches if the character +to the left is a ``word'' character and the character to the +right is a ``non-word'' character, or vice-versa. + +@item \B +Matches everywhere but on a word boundary; that is it matches +if the character to the left and the character to the right +are either both ``word'' characters or both ``non-word'' +characters. + +@item \` +Matches only at the start of pattern space. This is different +from @code{^} in multi-line mode. + +@item \' +Matches only at the end of pattern space. This is different +from @code{$} in multi-line mode. + +@ifset PERL +@item \G +Match only at the start of pattern space or, when doing a global +substitution using the @code{s///g} command and option, at +the end-of-match position of the prior match. For example, +@samp{s/\Ga/Z/g} will change an initial run of @code{a}s to +a run of @code{Z}s +@end ifset +@end table + +@node Examples +@chapter Some Sample Scripts + +Here are some @command{sed} scripts to guide you in the art of mastering +@command{sed}. + +@menu +Some exotic examples: +* Centering lines:: +* Increment a number:: +* Rename files to lower case:: +* Print bash environment:: +* Reverse chars of lines:: + +Emulating standard utilities: +* tac:: Reverse lines of files +* cat -n:: Numbering lines +* cat -b:: Numbering non-blank lines +* wc -c:: Counting chars +* wc -w:: Counting words +* wc -l:: Counting lines +* head:: Printing the first lines +* tail:: Printing the last lines +* uniq:: Make duplicate lines unique +* uniq -d:: Print duplicated lines of input +* uniq -u:: Remove all duplicated lines +* cat -s:: Squeezing blank lines +@end menu + +@node Centering lines +@section Centering Lines + +This script centers all lines of a file on a 80 columns width. +To change that width, the number in @code{\@{@dots{}\@}} must be +replaced, and the number of added spaces also must be changed. + +Note how the buffer commands are used to separate parts in +the regular expressions to be matched---this is a common +technique. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +@group +# Put 80 spaces in the buffer +1 @{ + x + s/^$/ / + s/^.*$/&&&&&&&&/ + x +@} +@end group + +@group +# del leading and trailing spaces +y/@kbd{tab}/ / +s/^ *// +s/ *$// +@end group + +@group +# add a newline and 80 spaces to end of line +G +@end group + +@group +# keep first 81 chars (80 + a newline) +s/^\(.\@{81\@}\).*$/\1/ +@end group + +@group +# \2 matches half of the spaces, which are moved to the beginning +s/^\(.*\)\n\(.*\)\2/\2\1/ +@end group +@end example +@c end--------------------------------------------- + +@node Increment a number +@section Increment a Number + +This script is one of a few that demonstrate how to do arithmetic +in @command{sed}. This is indeed possible,@footnote{@command{sed} guru Greg +Ubben wrote an implementation of the @command{dc} @sc{rpn} calculator! +It is distributed together with sed.} but must be done manually. + +To increment one number you just add 1 to last digit, replacing +it by the following digit. There is one exception: when the digit +is a nine the previous digits must be also incremented until you +don't have a nine. + +This solution by Bruno Haible is very clever and smart because +it uses a single buffer; if you don't have this limitation, the +algorithm used in @ref{cat -n, Numbering lines}, is faster. +It works by replacing trailing nines with an underscore, then +using multiple @code{s} commands to increment the last digit, +and then again substituting underscores with zeros. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +/[^0-9]/ d + +@group +# replace all trailing 9s by _ (any other character except digits, could +# be used) +:d +s/9\(_*\)$/_\1/ +td +@end group + +@group +# incr last digit only. The first line adds a most-significant +# digit of 1 if we have to add a digit. +@end group + +@group +s/^\(_*\)$/1\1/; tn +s/8\(_*\)$/9\1/; tn +s/7\(_*\)$/8\1/; tn +s/6\(_*\)$/7\1/; tn +s/5\(_*\)$/6\1/; tn +s/4\(_*\)$/5\1/; tn +s/3\(_*\)$/4\1/; tn +s/2\(_*\)$/3\1/; tn +s/1\(_*\)$/2\1/; tn +s/0\(_*\)$/1\1/; tn +@end group + +@group +:n +y/_/0/ +@end group +@end example +@c end--------------------------------------------- + +@node Rename files to lower case +@section Rename Files to Lower Case + +This is a pretty strange use of @command{sed}. We transform text, and +transform it to be shell commands, then just feed them to shell. +Don't worry, even worse hacks are done when using @command{sed}; I have +seen a script converting the output of @command{date} into a @command{bc} +program! + +The main body of this is the @command{sed} script, which remaps the name +from lower to upper (or vice-versa) and even checks out +if the remapped name is the same as the original name. +Note how the script is parameterized using shell +variables and proper quoting. + +@c start------------------------------------------- +@example +@group +#! /bin/sh +# rename files to lower/upper case... +# +# usage: +# move-to-lower * +# move-to-upper * +# or +# move-to-lower -R . +# move-to-upper -R . +# +@end group + +@group +help() +@{ + cat << eof +Usage: $0 [-n] [-r] [-h] files... +@end group + +@group +-n do nothing, only see what would be done +-R recursive (use find) +-h this message +files files to remap to lower case +@end group + +@group +Examples: + $0 -n * (see if everything is ok, then...) + $0 * +@end group + + $0 -R . + +@group +eof +@} +@end group + +@group +apply_cmd='sh' +finder='echo "$@@" | tr " " "\n"' +files_only= +@end group + +@group +while : +do + case "$1" in + -n) apply_cmd='cat' ;; + -R) finder='find "$@@" -type f';; + -h) help ; exit 1 ;; + *) break ;; + esac + shift +done +@end group + +@group +if [ -z "$1" ]; then + echo Usage: $0 [-h] [-n] [-r] files... + exit 1 +fi +@end group + +@group +LOWER='abcdefghijklmnopqrstuvwxyz' +UPPER='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +@end group + +@group +case `basename $0` in + *upper*) TO=$UPPER; FROM=$LOWER ;; + *) FROM=$UPPER; TO=$LOWER ;; +esac +@end group + +eval $finder | sed -n ' + +@group +# remove all trailing slashes +s/\/*$// +@end group + +@group +# add ./ if there is no path, only a filename +/\//! s/^/.\// +@end group + +@group +# save path+filename +h +@end group + +@group +# remove path +s/.*\/// +@end group + +@group +# do conversion only on filename +y/'$FROM'/'$TO'/ +@end group + +@group +# now line contains original path+file, while +# hold space contains the new filename +x +@end group + +@group +# add converted file name to line, which now contains +# path/file-name\nconverted-file-name +G +@end group + +@group +# check if converted file name is equal to original file name, +# if it is, do not print anything +/^.*\/\(.*\)\n\1/b +@end group + +@group +# escape special characters for the shell +s/["$`\\]/\\&/g +@end group + +@group +# now, transform path/fromfile\n, into +# mv path/fromfile path/tofile and print it +s/^\(.*\/\)\(.*\)\n\(.*\)$/mv "\1\2" "\1\3"/p +@end group + +' | $apply_cmd +@end example +@c end--------------------------------------------- + +@node Print bash environment +@section Print @command{bash} Environment + +This script strips the definition of the shell functions +from the output of the @command{set} Bourne-shell command. + +@c start------------------------------------------- +@example +#!/bin/sh + +@group +set | sed -n ' +:x +@end group + +@group +@ifinfo +# if no occurrence of "=()" print and load next line +@end ifinfo +@ifnotinfo +# if no occurrence of @samp{=()} print and load next line +@end ifnotinfo +/=()/! @{ p; b; @} +/ () $/! @{ p; b; @} +@end group + +@group +# possible start of functions section +# save the line in case this is a var like FOO="() " +h +@end group + +@group +# if the next line has a brace, we quit because +# nothing comes after functions +n +/^@{/ q +@end group + +@group +# print the old line +x; p +@end group + +@group +# work on the new line now +x; bx +' +@end group +@end example +@c end--------------------------------------------- + +@node Reverse chars of lines +@section Reverse Characters of Lines + +This script can be used to reverse the position of characters +in lines. The technique moves two characters at a time, hence +it is faster than more intuitive implementations. + +Note the @code{tx} command before the definition of the label. +This is often needed to reset the flag that is tested by +the @code{t} command. + +Imaginative readers will find uses for this script. An example +is reversing the output of @command{banner}.@footnote{This requires +another script to pad the output of banner; for example + +@example +#! /bin/sh + +banner -w $1 $2 $3 $4 | + sed -e :a -e '/^.\@{0,'$1'\@}$/ @{ s/$/ /; ba; @}' | + ~/sedscripts/reverseline.sed +@end example +} + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +/../! b + +@group +# Reverse a line. Begin embedding the line between two newlines +s/^.*$/\ +&\ +/ +@end group + +@group +# Move first character at the end. The regexp matches until +# there are zero or one characters between the markers +tx +:x +s/\(\n.\)\(.*\)\(.\n\)/\3\2\1/ +tx +@end group + +@group +# Remove the newline markers +s/\n//g +@end group +@end example +@c end--------------------------------------------- + +@node tac +@section Reverse Lines of Files + +This one begins a series of totally useless (yet interesting) +scripts emulating various Unix commands. This, in particular, +is a @command{tac} workalike. + +Note that on implementations other than @acronym{GNU} @command{sed} +@ifset PERL +and @value{SSED} +@end ifset +this script might easily overflow internal buffers. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +# reverse all lines of input, i.e. first line became last, ... + +@group +# from the second line, the buffer (which contains all previous lines) +# is *appended* to current line, so, the order will be reversed +1! G +@end group + +@group +# on the last line we're done -- print everything +$ p +@end group + +@group +# store everything on the buffer again +h +@end group +@end example +@c end--------------------------------------------- + +@node cat -n +@section Numbering Lines + +This script replaces @samp{cat -n}; in fact it formats its output +exactly like @acronym{GNU} @command{cat} does. + +Of course this is completely useless and for two reasons: first, +because somebody else did it in C, second, because the following +Bourne-shell script could be used for the same purpose and would +be much faster: + +@c start------------------------------------------- +@example +@group +#! /bin/sh +sed -e "=" $@@ | sed -e ' + s/^/ / + N + s/^ *\(......\)\n/\1 / +' +@end group +@end example +@c end--------------------------------------------- + +It uses @command{sed} to print the line number, then groups lines two +by two using @code{N}. Of course, this script does not teach as much as +the one presented below. + +The algorithm used for incrementing uses both buffers, so the line +is printed as soon as possible and then discarded. The number +is split so that changing digits go in a buffer and unchanged ones go +in the other; the changed digits are modified in a single step +(using a @code{y} command). The line number for the next line +is then composed and stored in the hold space, to be used in the +next iteration. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +@group +# Prime the pump on the first line +x +/^$/ s/^.*$/1/ +@end group + +@group +# Add the correct line number before the pattern +G +h +@end group + +@group +# Format it and print it +s/^/ / +s/^ *\(......\)\n/\1 /p +@end group + +@group +# Get the line number from hold space; add a zero +# if we're going to add a digit on the next line +g +s/\n.*$// +/^9*$/ s/^/0/ +@end group + +@group +# separate changing/unchanged digits with an x +s/.9*$/x&/ +@end group + +@group +# keep changing digits in hold space +h +s/^.*x// +y/0123456789/1234567890/ +x +@end group + +@group +# keep unchanged digits in pattern space +s/x.*$// +@end group + +@group +# compose the new number, remove the newline implicitly added by G +G +s/\n// +h +@end group +@end example +@c end--------------------------------------------- + +@node cat -b +@section Numbering Non-blank Lines + +Emulating @samp{cat -b} is almost the same as @samp{cat -n}---we only +have to select which lines are to be numbered and which are not. + +The part that is common to this script and the previous one is +not commented to show how important it is to comment @command{sed} +scripts properly... + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +@group +/^$/ @{ + p + b +@} +@end group + +@group +# Same as cat -n from now +x +/^$/ s/^.*$/1/ +G +h +s/^/ / +s/^ *\(......\)\n/\1 /p +x +s/\n.*$// +/^9*$/ s/^/0/ +s/.9*$/x&/ +h +s/^.*x// +y/0123456789/1234567890/ +x +s/x.*$// +G +s/\n// +h +@end group +@end example +@c end--------------------------------------------- + +@node wc -c +@section Counting Characters + +This script shows another way to do arithmetic with @command{sed}. +In this case we have to add possibly large numbers, so implementing +this by successive increments would not be feasible (and possibly +even more complicated to contrive than this script). + +The approach is to map numbers to letters, kind of an abacus +implemented with @command{sed}. @samp{a}s are units, @samp{b}s are +tens and so on: we simply add the number of characters +on the current line as units, and then propagate the carry +to tens, hundreds, and so on. + +As usual, running totals are kept in hold space. + +On the last line, we convert the abacus form back to decimal. +For the sake of variety, this is done with a loop rather than +with some 80 @code{s} commands@footnote{Some implementations +have a limit of 199 commands per script}: first we +convert units, removing @samp{a}s from the number; then we +rotate letters so that tens become @samp{a}s, and so on +until no more letters remain. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +@group +# Add n+1 a's to hold space (+1 is for the newline) +s/./a/g +H +x +s/\n/a/ +@end group + +@group +# Do the carry. The t's and b's are not necessary, +# but they do speed up the thing +t a +: a; s/aaaaaaaaaa/b/g; t b; b done +: b; s/bbbbbbbbbb/c/g; t c; b done +: c; s/cccccccccc/d/g; t d; b done +: d; s/dddddddddd/e/g; t e; b done +: e; s/eeeeeeeeee/f/g; t f; b done +: f; s/ffffffffff/g/g; t g; b done +: g; s/gggggggggg/h/g; t h; b done +: h; s/hhhhhhhhhh//g +@end group + +@group +: done +$! @{ + h + b +@} +@end group + +# On the last line, convert back to decimal + +@group +: loop +/a/! s/[b-h]*/&0/ +s/aaaaaaaaa/9/ +s/aaaaaaaa/8/ +s/aaaaaaa/7/ +s/aaaaaa/6/ +s/aaaaa/5/ +s/aaaa/4/ +s/aaa/3/ +s/aa/2/ +s/a/1/ +@end group + +@group +: next +y/bcdefgh/abcdefg/ +/[a-h]/ b loop +p +@end group +@end example +@c end--------------------------------------------- + +@node wc -w +@section Counting Words + +This script is almost the same as the previous one, once each +of the words on the line is converted to a single @samp{a} +(in the previous script each letter was changed to an @samp{a}). + +It is interesting that real @command{wc} programs have optimized +loops for @samp{wc -c}, so they are much slower at counting +words rather than characters. This script's bottleneck, +instead, is arithmetic, and hence the word-counting one +is faster (it has to manage smaller numbers). + +Again, the common parts are not commented to show the importance +of commenting @command{sed} scripts. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +@group +# Convert words to a's +s/[ @kbd{tab}][ @kbd{tab}]*/ /g +s/^/ / +s/ [^ ][^ ]*/a /g +s/ //g +@end group + +@group +# Append them to hold space +H +x +s/\n// +@end group + +@group +# From here on it is the same as in wc -c. +/aaaaaaaaaa/! bx; s/aaaaaaaaaa/b/g +/bbbbbbbbbb/! bx; s/bbbbbbbbbb/c/g +/cccccccccc/! bx; s/cccccccccc/d/g +/dddddddddd/! bx; s/dddddddddd/e/g +/eeeeeeeeee/! bx; s/eeeeeeeeee/f/g +/ffffffffff/! bx; s/ffffffffff/g/g +/gggggggggg/! bx; s/gggggggggg/h/g +s/hhhhhhhhhh//g +:x +$! @{ h; b; @} +:y +/a/! s/[b-h]*/&0/ +s/aaaaaaaaa/9/ +s/aaaaaaaa/8/ +s/aaaaaaa/7/ +s/aaaaaa/6/ +s/aaaaa/5/ +s/aaaa/4/ +s/aaa/3/ +s/aa/2/ +s/a/1/ +y/bcdefgh/abcdefg/ +/[a-h]/ by +p +@end group +@end example +@c end--------------------------------------------- + +@node wc -l +@section Counting Lines + +No strange things are done now, because @command{sed} gives us +@samp{wc -l} functionality for free!!! Look: + +@c start------------------------------------------- +@example +@group +#!/usr/bin/sed -nf +$= +@end group +@end example +@c end--------------------------------------------- + +@node head +@section Printing the First Lines + +This script is probably the simplest useful @command{sed} script. +It displays the first 10 lines of input; the number of displayed +lines is right before the @code{q} command. + +@c start------------------------------------------- +@example +@group +#!/usr/bin/sed -f +10q +@end group +@end example +@c end--------------------------------------------- + +@node tail +@section Printing the Last Lines + +Printing the last @var{n} lines rather than the first is more complex +but indeed possible. @var{n} is encoded in the second line, before +the bang character. + +This script is similar to the @command{tac} script in that it keeps the +final output in the hold space and prints it at the end: + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +@group +1! @{; H; g; @} +1,10 !s/[^\n]*\n// +$p +h +@end group +@end example +@c end--------------------------------------------- + +Mainly, the scripts keeps a window of 10 lines and slides it +by adding a line and deleting the oldest (the substitution command +on the second line works like a @code{D} command but does not +restart the loop). + +The ``sliding window'' technique is a very powerful way to write +efficient and complex @command{sed} scripts, because commands like +@code{P} would require a lot of work if implemented manually. + +To introduce the technique, which is fully demonstrated in the +rest of this chapter and is based on the @code{N}, @code{P} +and @code{D} commands, here is an implementation of @command{tail} +using a simple ``sliding window.'' + +This looks complicated but in fact the working is the same as +the last script: after we have kicked in the appropriate number +of lines, however, we stop using the hold space to keep inter-line +state, and instead use @code{N} and @code{D} to slide pattern +space by one line: + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +@group +1h +2,10 @{; H; g; @} +$q +1,9d +N +D +@end group +@end example +@c end--------------------------------------------- + +Note how the first, second and fourth line are inactive after +the first ten lines of input. After that, all the script does +is: exiting on the last line of input, appending the next input +line to pattern space, and removing the first line. + +@node uniq +@section Make Duplicate Lines Unique + +This is an example of the art of using the @code{N}, @code{P} +and @code{D} commands, probably the most difficult to master. + +@c start------------------------------------------- +@example +@group +#!/usr/bin/sed -f +h +@end group + +@group +:b +# On the last line, print and exit +$b +N +/^\(.*\)\n\1$/ @{ + # The two lines are identical. Undo the effect of + # the n command. + g + bb +@} +@end group + +@group +# If the @code{N} command had added the last line, print and exit +$b +@end group + +@group +# The lines are different; print the first and go +# back working on the second. +P +D +@end group +@end example +@c end--------------------------------------------- + +As you can see, we mantain a 2-line window using @code{P} and @code{D}. +This technique is often used in advanced @command{sed} scripts. + +@node uniq -d +@section Print Duplicated Lines of Input + +This script prints only duplicated lines, like @samp{uniq -d}. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +@group +$b +N +/^\(.*\)\n\1$/ @{ + # Print the first of the duplicated lines + s/.*\n// + p +@end group + +@group + # Loop until we get a different line + :b + $b + N + /^\(.*\)\n\1$/ @{ + s/.*\n// + bb + @} +@} +@end group + +@group +# The last line cannot be followed by duplicates +$b +@end group + +@group +# Found a different one. Leave it alone in the pattern space +# and go back to the top, hunting its duplicates +D +@end group +@end example +@c end--------------------------------------------- + +@node uniq -u +@section Remove All Duplicated Lines + +This script prints only unique lines, like @samp{uniq -u}. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +@group +# Search for a duplicate line --- until that, print what you find. +$b +N +/^\(.*\)\n\1$/ ! @{ + P + D +@} +@end group + +@group +:c +# Got two equal lines in pattern space. At the +# end of the file we simply exit +$d +@end group + +@group +# Else, we keep reading lines with @code{N} until we +# find a different one +s/.*\n// +N +/^\(.*\)\n\1$/ @{ + bc +@} +@end group + +@group +# Remove the last instance of the duplicate line +# and go back to the top +D +@end group +@end example +@c end--------------------------------------------- + +@node cat -s +@section Squeezing Blank Lines + +As a final example, here are three scripts, of increasing complexity +and speed, that implement the same function as @samp{cat -s}, that is +squeezing blank lines. + +The first leaves a blank line at the beginning and end if there are +some already. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +@group +# on empty lines, join with next +# Note there is a star in the regexp +:x +/^\n*$/ @{ +N +bx +@} +@end group + +@group +# now, squeeze all '\n', this can be also done by: +# s/^\(\n\)*/\1/ +s/\n*/\ +/ +@end group +@end example +@c end--------------------------------------------- + +This one is a bit more complex and removes all empty lines +at the beginning. It does leave a single blank line at end +if one was there. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -f + +@group +# delete all leading empty lines +1,/^./@{ +/./!d +@} +@end group + +@group +# on an empty line we remove it and all the following +# empty lines, but one +:x +/./!@{ +N +s/^\n$// +tx +@} +@end group +@end example +@c end--------------------------------------------- + +This removes leading and trailing blank lines. It is also the +fastest. Note that loops are completely done with @code{n} and +@code{b}, without relying on @command{sed} to restart the +the script automatically at the end of a line. + +@c start------------------------------------------- +@example +#!/usr/bin/sed -nf + +@group +# delete all (leading) blanks +/./!d +@end group + +@group +# get here: so there is a non empty +:x +# print it +p +# get next +n +# got chars? print it again, etc... +/./bx +@end group + +@group +# no, don't have chars: got an empty line +:z +# get next, if last line we finish here so no trailing +# empty lines are written +n +# also empty? then ignore it, and get next... this will +# remove ALL empty lines +/./!bz +@end group + +@group +# all empty lines were deleted/ignored, but we have a non empty. As +# what we want to do is to squeeze, insert a blank line artificially +i\ +@end group + +bx +@end example +@c end--------------------------------------------- + +@node Limitations +@chapter @value{SSED}'s Limitations and Non-limitations + +@cindex @acronym{GNU} extensions, unlimited line length +@cindex Portability, line length limitations +For those who want to write portable @command{sed} scripts, +be aware that some implementations have been known to +limit line lengths (for the pattern and hold spaces) +to be no more than 4000 bytes. +The @sc{posix} standard specifies that conforming @command{sed} +implementations shall support at least 8192 byte line lengths. +@value{SSED} has no built-in limit on line length; +as long as it can @code{malloc()} more (virtual) memory, +you can feed or construct lines as long as you like. + +However, recursion is used to handle subpatterns and indefinite +repetition. This means that the available stack space may limit +the size of the buffer that can be processed by certain patterns. + +@ifset PERL +There are some size limitations in the regular expression +matcher but it is hoped that they will never in practice +be relevant. The maximum length of a compiled pattern +is 65539 (sic) bytes. All values in repeating quantifiers +must be less than 65536. The maximum nesting depth of +all parenthesized subpatterns, including capturing and +non-capturing subpatterns@footnote{The +distinction is meaningful when referring to Perl-style +regular expressions.}, assertions, and other types of +subpattern, is 200. + +Also, @value{SSED} recognizes the @sc{posix} syntax +@code{[.@var{ch}.]} and @code{[=@var{ch}=]} +where @var{ch} is a ``collating element'', but these +are not supported, and an error is given if they are +encountered. + +Here are a few distinctions between the real Perl-style +regular expressions and those that @option{-R} recognizes. + +@enumerate +@item +Lookahead assertions do not allow repeat quantifiers after them +Perl permits them, but they do not mean what you +might think. For example, @samp{(?!a)@{3@}} does not assert that the +next three characters are not @samp{a}. It just asserts three times that the +next character is not @samp{a} --- a waste of time and nothing else. + +@item +Capturing subpatterns that occur inside negative lookahead +head assertions are counted, but their entries are counted +as empty in the second half of an @code{s} command. +Perl sets its numerical variables from any such patterns +that are matched before the assertion fails to match +something (thereby succeeding), but only if the negative +lookahead assertion contains just one branch. + +@item +The following Perl escape sequences are not supported: +@samp{\l}, @samp{\u}, @samp{\L}, @samp{\U}, @samp{\E}, +@samp{\Q}. In fact these are implemented by Perl's general +string-handling and are not part of its pattern matching engine. + +@item +The Perl @samp{\G} assertion is not supported as it is not +relevant to single pattern matches. + +@item +Fairly obviously, @value{SSED} does not support the @samp{(?@{code@})} +and @samp{(?p@{code@})} constructions. However, there is some experimental +support for recursive patterns using the non-Perl item @samp{(?R)}. + +@item +There are at the time of writing some oddities in Perl +5.005_02 concerned with the settings of captured strings +when part of a pattern is repeated. For example, matching +@samp{aba} against the pattern @samp{/^(a(b)?)+$/} sets +@samp{$2}@footnote{@samp{$2} would be @samp{\2} in @value{SSED}.} +to the value @samp{b}, but matching @samp{aabbaa} +against @samp{/^(aa(bb)?)+$/} leaves @samp{$2} +unset. However, if the pattern is changed to +@samp{/^(aa(b(b))?)+$/} then @samp{$2} (and @samp{$3}) are set. +In Perl 5.004 @samp{$2} is set in both cases, and that is also +true of @value{SSED}. + +@item +Another as yet unresolved discrepancy is that in Perl +5.005_02 the pattern @samp{/^(a)?(?(1)a|b)+$/} matches +the string @samp{a}, whereas in @value{SSED} it does not. +However, in both Perl and @value{SSED} @samp{/^(a)?a/} matched +against @samp{a} leaves $1 unset. +@end enumerate +@end ifset + +@node Other Resources +@chapter Other Resources for Learning About @command{sed} + +@cindex Additional reading about @command{sed} +In addition to several books that have been written about @command{sed} +(either specifically or as chapters in books which discuss +shell programming), one can find out more about @command{sed} +(including suggestions of a few books) from the FAQ +for the @code{sed-users} mailing list, available from: +@display +@uref{http://sed.sourceforge.net/sedfaq.html} +@end display + +Also of interest are +@uref{http://www.student.northpark.edu/pemente/sed/index.htm} +and @uref{http://sed.sf.net/grabbag}, +which include @command{sed} tutorials and other @command{sed}-related goodies. + +The @code{sed-users} mailing list itself maintained by Sven Guckes. +To subscribe, visit @uref{http://groups.yahoo.com} and search +for the @code{sed-users} mailing list. + +@node Reporting Bugs +@chapter Reporting Bugs + +@cindex Bugs, reporting +Email bug reports to @email{bug-sed@@gnu.org}. +Also, please include the output of @samp{sed --version} in the body +of your report if at all possible. + +Please do not send a bug report like this: + +@example +@i{@i{@r{while building frobme-1.3.4}}} +$ configure +@error{} sed: file sedscr line 1: Unknown option to 's' +@end example + +If @value{SSED} doesn't configure your favorite package, take a +few extra minutes to identify the specific problem and make a stand-alone +test case. Unlike other programs such as C compilers, making such test +cases for @command{sed} is quite simple. + +A stand-alone test case includes all the data necessary to perform the +test, and the specific invocation of @command{sed} that causes the problem. +The smaller a stand-alone test case is, the better. A test case should +not involve something as far removed from @command{sed} as ``try to configure +frobme-1.3.4''. Yes, that is in principle enough information to look +for the bug, but that is not a very practical prospect. + +Here are a few commonly reported bugs that are not bugs. + +@table @asis +@item @code{N} command on the last line +@cindex Portability, @code{N} command on the last line +@cindex Non-bugs, @code{N} command on the last line + +Most versions of @command{sed} exit without printing anything when +the @command{N} command is issued on the last line of a file. +@value{SSED} prints pattern space before exiting unless of course +the @command{-n} command switch has been specified. This choice is +by design. + +For example, the behavior of +@example +sed N foo bar +@end example +@noindent +would depend on whether foo has an even or an odd number of +lines@footnote{which is the actual ``bug'' that prompted the +change in behavior}. Or, when writing a script to read the +next few lines following a pattern match, traditional +implementations of @code{sed} would force you to write +something like +@example +/foo/@{ $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N @} +@end example +@noindent +instead of just +@example +/foo/@{ N;N;N;N;N;N;N;N;N; @} +@end example + +@cindex @code{POSIXLY_CORRECT} behavior, @code{N} command +In any case, the simplest workaround is to use @code{$d;N} in +scripts that rely on the traditional behavior, or to set +the @code{POSIXLY_CORRECT} variable to a non-empty value. + +@item Regex syntax clashes (problems with backslashes) +@cindex @acronym{GNU} extensions, to basic regular expressions +@cindex Non-bugs, regex syntax clashes +@command{sed} uses the @sc{posix} basic regular expression syntax. According to +the standard, the meaning of some escape sequences is undefined in +this syntax; notable in the case of @command{sed} are @code{\|}, +@code{\+}, @code{\?}, @code{\`}, @code{\'}, @code{\<}, +@code{\>}, @code{\b}, @code{\B}, @code{\w}, and @code{\W}. + +As in all @acronym{GNU} programs that use @sc{posix} basic regular +expressions, @command{sed} interprets these escape sequences as special +characters. So, @code{x\+} matches one or more occurrences of @samp{x}. +@code{abc\|def} matches either @samp{abc} or @samp{def}. + +This syntax may cause problems when running scripts written for other +@command{sed}s. Some @command{sed} programs have been written with the +assumption that @code{\|} and @code{\+} match the literal characters +@code{|} and @code{+}. Such scripts must be modified by removing the +spurious backslashes if they are to be used with modern implementations +of @command{sed}, like +@ifset PERL +@value{SSED} or +@end ifset +@acronym{GNU} @command{sed}. + +On the other hand, some scripts use s|abc\|def||g to remove occurrences +of @emph{either} @code{abc} or @code{def}. While this worked until +@command{sed} 4.0.x, newer versions interpret this as removing the +string @code{abc|def}. This is again undefined behavior according to +@acronym{POSIX}, and this interpretation is arguably more robust: older +@command{sed}s, for example, required that the regex matcher parsed +@code{\/} as @code{/} in the common case of escaping a slash, which is +again undefined behavior; the new behavior avoids this, and this is good +because the regex matcher is only partially under our control. + +@cindex @acronym{GNU} extensions, special escapes +In addition, this version of @command{sed} supports several escape characters +(some of which are multi-character) to insert non-printable characters +in scripts (@code{\a}, @code{\c}, @code{\d}, @code{\o}, @code{\r}, +@code{\t}, @code{\v}, @code{\x}). These can cause similar problems +with scripts written for other @command{sed}s. + +@item @option{-i} clobbers read-only files +@cindex In-place editing +@cindex @value{SSEDEXT}, in-place editing +@cindex Non-bugs, in-place editing + +In short, @samp{sed -i} will let you delete the contents of +a read-only file, and in general the @option{-i} option +(@pxref{Invoking sed, , Invocation}) lets you clobber +protected files. This is not a bug, but rather a consequence +of how the Unix filesystem works. + +The permissions on a file say what can happen to the data +in that file, while the permissions on a directory say what can +happen to the list of files in that directory. @samp{sed -i} +will not ever open for writing a file that is already on disk. +Rather, it will work on a temporary file that is finally renamed +to the original name: if you rename or delete files, you're actually +modifying the contents of the directory, so the operation depends on +the permissions of the directory, not of the file. For this same +reason, @command{sed} does not let you use @option{-i} on a writeable file +in a read-only directory, and will break hard or symbolic links when +@option{-i} is used on such a file. + +@item @code{0a} does not work (gives an error) +@cindex @code{0} address +@cindex @acronym{GNU} extensions, @code{0} address +@cindex Non-bugs, @code{0} address + +There is no line 0. 0 is a special address that is only used to treat +addresses like @code{0,/@var{RE}/} as active when the script starts: if +you write @code{1,/abc/d} and the first line includes the word @samp{abc}, +then that match would be ignored because address ranges must span at least +two lines (barring the end of the file); but what you probably wanted is +to delete every line up to the first one including @samp{abc}, and this +is obtained with @code{0,/abc/d}. + +@ifclear PERL +@item @code{[a-z]} is case insensitive +@cindex Non-bugs, localization-related + +You are encountering problems with locales. POSIX mandates that @code{[a-z]} +uses the current locale's collation order -- in C parlance, that means using +@code{strcoll(3)} instead of @code{strcmp(3)}. Some locales have a +case-insensitive collation order, others don't. + +Another problem is that @code{[a-z]} tries to use collation symbols. +This only happens if you are on the @acronym{GNU} system, using +@acronym{GNU} libc's regular expression matcher instead of compiling the +one supplied with @acronym{GNU} sed. In a Danish locale, for example, +the regular expression @code{^[a-z]$} matches the string @samp{aa}, +because this is a single collating symbol that comes after @samp{a} +and before @samp{b}; @samp{ll} behaves similarly in Spanish +locales, or @samp{ij} in Dutch locales. + +To work around these problems, which may cause bugs in shell scripts, set +the @env{LC_COLLATE} and @env{LC_CTYPE} environment variables to @samp{C}. + +@item @code{s/.*//} does not clear pattern space +@cindex Non-bugs, localization-related +@cindex @value{SSEDEXT}, emptying pattern space +@cindex Emptying pattern space + +This happens if your input stream includes invalid multibyte +sequences. @sc{posix} mandates that such sequences +are @emph{not} matched by @samp{.}, so that @samp{s/.*//} will not clear +pattern space as you would expect. In fact, there is no way to clear +sed's buffers in the middle of the script in most multibyte locales +(including UTF-8 locales). For this reason, @value{SSED} provides a `z' +command (for `zap') as an extension. + +To work around these problems, which may cause bugs in shell scripts, set +the @env{LC_COLLATE} and @env{LC_CTYPE} environment variables to @samp{C}. +@end ifclear +@end table + + +@node Extended regexps +@appendix Extended regular expressions +@cindex Extended regular expressions, syntax + +The only difference between basic and extended regular expressions is in +the behavior of a few characters: @samp{?}, @samp{+}, parentheses, +braces (@samp{@{@}}), and @samp{|}. While basic regular expressions +require these to be escaped if you want them to behave as special +characters, when using extended regular expressions you must escape +them if you want them @emph{to match a literal character}. @samp{|} +is special here because @samp{\|} is a GNU extension -- standard +basic regular expressions do not provide its functionality. + +@noindent +Examples: +@table @code +@item abc? +becomes @samp{abc\?} when using extended regular expressions. It matches +the literal string @samp{abc?}. + +@item c\+ +becomes @samp{c+} when using extended regular expressions. It matches +one or more @samp{c}s. + +@item a\@{3,\@} +becomes @samp{a@{3,@}} when using extended regular expressions. It matches +three or more @samp{a}s. + +@item \(abc\)\@{2,3\@} +becomes @samp{(abc)@{2,3@}} when using extended regular expressions. It +matches either @samp{abcabc} or @samp{abcabcabc}. + +@item \(abc*\)\1 +becomes @samp{(abc*)\1} when using extended regular expressions. +Backreferences must still be escaped when using extended regular +expressions. +@end table + +@ifset PERL +@node Perl regexps +@appendix Perl-style regular expressions +@cindex Perl-style regular expressions, syntax + +@emph{This part is taken from the @file{pcre.txt} file distributed together +with the free @sc{pcre} regular expression matcher; it was written by Philip Hazel.} + +Perl introduced several extensions to regular expressions, some +of them incompatible with the syntax of regular expressions +accepted by Emacs and other @acronym{GNU} tools (whose matcher was +based on the Emacs matcher). @value{SSED} implements +both kinds of extensions. + +@iftex +Summarizing, we have: + +@itemize @bullet +@item +A backslash can introduce several special sequences + +@item +The circumflex, dollar sign, and period characters behave specially +with regard to new lines + +@item +Strange uses of square brackets are parsed differently + +@item +You can toggle modifiers in the middle of a regular expression + +@item +You can specify that a subpattern does not count when numbering backreferences + +@item +@cindex Greedy regular expression matching +You can specify greedy or non-greedy matching + +@item +You can have more than ten back references + +@item +You can do complex look aheads and look behinds (in the spirit of +@code{\b}, but with subpatterns). + +@item +You can often improve performance by avoiding that @command{sed} wastes +time with backtracking + +@item +You can have if/then/else branches + +@item +You can do recursive matches, for example to look for unbalanced parentheses + +@item +You can have comments and non-significant whitespace, because things can +get complex... +@end itemize + +Most of these extensions are introduced by the special @code{(?} +sequence, which gives special meanings to parenthesized groups. +@end iftex +@menu +Other extensions can be roughly subdivided in two categories +On one hand Perl introduces several more escaped sequences +(that is, sequences introduced by a backslash). On the other +hand, it specifies that if a question mark follows an open +parentheses it should give a special meaning to the parenthesized +group. + +* Backslash:: Introduces special sequences +* Circumflex/dollar sign/period:: Behave specially with regard to new lines +* Square brackets:: Are a bit different in strange cases +* Options setting:: Toggle modifiers in the middle of a regexp +* Non-capturing subpatterns:: Are not counted when backreferencing +* Repetition:: Allows for non-greedy matching +* Backreferences:: Allows for more than 10 back references +* Assertions:: Allows for complex look ahead matches +* Non-backtracking subpatterns:: Often gives more performance +* Conditional subpatterns:: Allows if/then/else branches +* Recursive patterns:: For example to match parentheses +* Comments:: Because things can get complex... +@end menu + +@node Backslash +@appendixsec Backslash +@cindex Perl-style regular expressions, escaped sequences + +There are a few difference in the handling of backslashed +sequences in Perl mode. + +First of all, there are no @code{\o} and @code{\d} sequences. +@sc{ascii} values for characters can be specified in octal +with a @code{\@var{xxx}} sequence, where @var{xxx} is a +sequence of up to three octal digits. If the first digit +is a zero, the treatment of the sequence is straightforward; +just note that if the character that follows the escaped digit +is itself an octal digit, you have to supply three octal digits +for @var{xxx}. For example @code{\07} is a @sc{bel} character +rather than a @sc{nul} and a literal @code{7} (this sequence is +instead represented by @code{\0007}). + +@cindex Perl-style regular expressions, backreferences +The handling of a backslash followed by a digit other than 0 +is complicated. Outside a character class, @command{sed} reads it +and any following digits as a decimal number. If the number +is less than 10, or if there have been at least that many +previous capturing left parentheses in the expression, the +entire sequence is taken as a back reference. A description +of how this works is given later, following the discussion +of parenthesized subpatterns. + +Inside a character class, or if the decimal number is +greater than 9 and there have not been that many capturing +subpatterns, @command{sed} re-reads up to three octal digits following +the backslash, and generates a single byte from the +least significant 8 bits of the value. Any subsequent digits +stand for themselves. For example: + +@example +\040 @i{@r{is another way of writing a space}} +\40 @i{@r{is the same, provided there are fewer than 40}} + @i{@r{previous capturing subpatterns}} +\7 @i{@r{is always a back reference}} +\011 @i{@r{is always a tab}} +\11 @i{@r{might be a back reference, or another way of writing a tab}} +\0113 @i{@r{is a tab followed by the character @samp{3}}} +\113 @i{@r{is the character with octal code 113 (since there}} + @i{@r{can be no more than 99 back references)}} +\377 @i{@r{is a byte consisting entirely of 1 bits (@sc{ascii} 255)}} +\81 @i{@r{is either a back reference, or a binary zero}} + @i{@r{followed by the two characters @samp{81}}} +@end example + +Note that octal values of 100 or greater must not be introduced +by a leading zero, because no more than three octal +digits are ever read. Note that this applies only to the LHS +pattern; it is not possible yet to specify more than 9 backreferences +on the RHS of the `s' command. + +All the sequences that define a single byte value can be +used both inside and outside character classes. In addition, +inside a character class, the sequence @code{\b} is interpreted +as the backspace character (hex 08). Outside a character +class it has a different meaning (see below). + +In addition, there are four additional escapes specifying +generic character classes (like @code{\w} and @code{\W} do): + +@cindex Perl-style regular expressions, character classes +@table @samp +@item \d +Matches any decimal digit + +@item \D +Matches any character that is not a decimal digit +@end table + +In Perl mode, these character type sequences can appear both inside and +outside character classes. Instead, in @sc{posix} mode these sequences +(as well as @code{\w} and @code{\W}) are treated as two literal characters +(a backslash and a letter) inside square brackets. + +Escaped sequences specifying assertions are also different in +Perl mode. An assertion specifies a condition that has to be met +at a particular point in a match, without consuming any +characters from the subject string. The use of subpatterns +for more complicated assertions is described below. The +backslashed assertions are + +@cindex Perl-style regular expressions, assertions +@table @samp +@item \b +Asserts that the point is at a word boundary. +A word boundary is a position in the subject string where +the current character and the previous character do not both +match @code{\w} or @code{\W} (i.e. one matches @code{\w} and +the other matches @code{\W}), or the start or end of the string +if the first or last character matches @code{\w}, respectively. + +@item \B +Asserts that the point is not at a word boundary. + +@item \A +Asserts the matcher is at the start of pattern space (independent +of multiline mode). + +@item \Z +Asserts the matcher is at the end of pattern space, +or at a newline before the end of pattern space (independent of +multiline mode) + +@item \z +Asserts the matcher is at the end of pattern space (independent +of multiline mode) +@end table + +These assertions may not appear in character classes (but +note that @code{\b} has a different meaning, namely the +backspace character, inside a character class). +Note that Perl mode does not support directly assertions +for the beginning and the end of word; the @acronym{GNU} extensions +@code{\<} and @code{\>} achieve this purpose in @sc{posix} mode +instead. + +The @code{\A}, @code{\Z}, and @code{\z} assertions differ +from the traditional circumflex and dollar sign (described below) +in that they only ever match at the very start and end of the +subject string, whatever options are set; in particular @code{\A} +and @code{\z} are the same as the @acronym{GNU} extensions +@code{\`} and @code{\'} that are active in @sc{posix} mode. + +@node Circumflex/dollar sign/period +@appendixsec Circumflex, dollar sign, period +@cindex Perl-style regular expressions, newlines + +Outside a character class, in the default matching mode, the +circumflex character is an assertion which is true only if +the current matching point is at the start of the subject +string. Inside a character class, the circumflex has an entirely +different meaning (see below). + +The circumflex need not be the first character of the pattern if +a number of alternatives are involved, but it should be the +first thing in each alternative in which it appears if the +pattern is ever to match that branch. If all possible alternatives, +start with a circumflex, that is, if the pattern is +constrained to match only at the start of the subject, it is +said to be an @dfn{anchored} pattern. (There are also other constructs +structs that can cause a pattern to be anchored.) + +A dollar sign is an assertion which is true only if the +current matching point is at the end of the subject string, +or immediately before a newline character that is the last +character in the string (by default). A dollar sign need not be the +last character of the pattern if a number of alternatives +are involved, but it should be the last item in any branch +in which it appears. A dollar sign has no special meaning in a +character class. + +@cindex Perl-style regular expressions, multiline +The meanings of the circumflex and dollar sign characters are +changed if the @code{M} modifier option is used. When this is +the case, they match immediately after and immediately +before an internal @code{\n} character, respectively, in addition +to matching at the start and end of the subject string. For +example, the pattern @code{/^abc$/} matches the subject string +@samp{def\nabc} in multiline mode, but not otherwise. Consequently, +patterns that are anchored in single line mode +because all branches start with @code{^} are not anchored in +multiline mode. + +@cindex Perl-style regular expressions, multiline +Note that the sequences @code{\A}, @code{\Z}, and @code{\z} +can be used to match the start and end of the subject in both +modes, and if all branches of a pattern start with @code{\A} +is it always anchored, whether the @code{M} modifier is set or not. + +@cindex Perl-style regular expressions, single line +Outside a character class, a dot in the pattern matches any +one character in the subject, including a non-printing character, +but not (by default) newline. If the @code{S} modifier is used, +dots match newlines as well. Actually, the handling of +dot is entirely independent of the handling of circumflex +and dollar sign, the only relationship being that they both +involve newline characters. Dot has no special meaning in a +character class. + +@node Square brackets +@appendixsec Square brackets +@cindex Perl-style regular expressions, character classes + +An opening square bracket introduces a character class, terminated +by a closing square bracket. A closing square bracket on its own +is not special. If a closing square bracket is required as a +member of the class, it should be the first data character in +the class (after an initial circumflex, if present) or escaped with a backslash. + +A character class matches a single character in the subject; +the character must be in the set of characters defined by +the class, unless the first character in the class is a circumflex, +in which case the subject character must not be in +the set defined by the class. If a circumflex is actually +required as a member of the class, ensure it is not the +first character, or escape it with a backslash. + +For example, the character class [aeiou] matches any lower +case vowel, while [^aeiou] matches any character that is not +a lower case vowel. Note that a circumflex is just a convenient +venient notation for specifying the characters which are in +the class by enumerating those that are not. It is not an +assertion: it still consumes a character from the subject +string, and fails if the current pointer is at the end of +the string. + +@cindex Perl-style regular expressions, case-insensitive +When caseless matching is set, any letters in a class +represent both their upper case and lower case versions, so +for example, a caseless @code{[aeiou]} matches uppercase +and lowercase @samp{A}s, and a caseless @code{[^aeiou]} +does not match @samp{A}, whereas a case-sensitive version would. + +@cindex Perl-style regular expressions, single line +@cindex Perl-style regular expressions, multiline +The newline character is never treated in any special way in +character classes, whatever the setting of the @code{S} and +@code{M} options (modifiers) is. A class such as @code{[^a]} will +always match a newline. + +The minus (hyphen) character can be used to specify a range +of characters in a character class. For example, @code{[d-m]} +matches any letter between d and m, inclusive. If a minus +character is required in a class, it must be escaped with a +backslash or appear in a position where it cannot be interpreted +as indicating a range, typically as the first or last +character in the class. + +It is not possible to have the literal character @code{]} as the +end character of a range. A pattern such as @code{[W-]46]} is +interpreted as a class of two characters (@code{W} and @code{-}) +followed by a literal string @code{46]}, so it would match +@samp{W46]} or @samp{-46]}. However, if the @code{]} is escaped +with a backslash it is interpreted as the end of range, so +@code{[W-\]46]} is interpreted as a single class containing a +range followed by two separate characters. The octal or +hexadecimal representation of @code{]} can also be used to end a range. + +Ranges operate in @sc{ascii} collating sequence. They can also be +used for characters specified numerically, for example +@code{[\000-\037]}. If a range that includes letters is used when +caseless matching is set, it matches the letters in either +case. For example, a caseless @code{[W-c]} is equivalent to +@code{[][\^_`wxyzabc]}, matched caselessly, and if character +tables for the French locale are in use, @code{[\xc8-\xcb]} +matches accented E characters in both cases. + +Unlike in @sc{posix} mode, the character types @code{\d}, +@code{\D}, @code{\s}, @code{\S}, @code{\w}, and @code{\W} +may also appear in a character class, and add the characters +that they match to the class. For example, @code{[\dABCDEF]} matches any +hexadecimal digit. A circumflex can conveniently be used +with the upper case character types to specify a more restricted +set of characters than the matching lower case type. +For example, the class @code{[^\W_]} matches any letter or digit, +but not underscore. + +All non-alphameric characters other than @code{\}, @code{-}, +@code{^} (at the start) and the terminating @code{]} +are non-special in character classes, but it does no harm +if they are escaped. + +Perl 5.6 supports the @sc{posix} notation for character classes, which +uses names enclosed by @code{[:} and @code{:]} within the enclosing +square brackets, and @value{SSED} supports this notation as well. +For example, + +@example +[01[:alpha:]%] +@end example + +@noindent +matches @samp{0}, @samp{1}, any alphabetic character, or @samp{%}. +The supported class names are + +@table @code +@item alnum +Matches letters and digits + +@item alpha +Matches letters + +@item ascii +Matches character codes 0 - 127 + +@item cntrl +Matches control characters + +@item digit +Matches decimal digits (same as \d) + +@item graph +Matches printing characters, excluding space + +@item lower +Matches lower case letters + +@item print +Matches printing characters, including space + +@item punct +Matches printing characters, excluding letters and digits + +@item space +Matches white space (same as \s) + +@item upper +Matches upper case letters + +@item word +Matches ``word'' characters (same as \w) + +@item xdigit +Matches hexadecimal digits +@end table + +The names @code{ascii} and @code{word} are extensions valid only in +Perl mode. Another Perl extension is negation, which is +indicated by a circumflex character after the colon. For example, + +@example +[12[:^digit:]] +@end example + +@noindent +matches @samp{1}, @samp{2}, or any non-digit. + +@node Options setting +@appendixsec Options setting +@cindex Perl-style regular expressions, toggling options +@cindex Perl-style regular expressions, case-insensitive +@cindex Perl-style regular expressions, multiline +@cindex Perl-style regular expressions, single line +@cindex Perl-style regular expressions, extended + +The settings of the @code{I}, @code{M}, @code{S}, @code{X} +modifiers can be changed from within the pattern by +a sequence of Perl option letters enclosed between @code{(?} +and @code{)}. The option letters must be lowercase. + +For example, @code{(?im)} sets caseless, multiline matching. It is +also possible to unset these options by preceding the letter +with a hyphen; you can also have combined settings and unsettings: +@code{(?im-sx)} sets caseless and multiline matching, +while unsets single line matching (for dots) and extended +whitespace interpretation. If a letter appears both before +and after the hyphen, the option is unset. + +The scope of these option changes depends on where in the +pattern the setting occurs. For settings that are outside +any subpattern (defined below), the effect is the same as if +the options were set or unset at the start of matching. The +following patterns all behave in exactly the same way: + +@example +(?i)abc +a(?i)bc +ab(?i)c +abc(?i) +@end example + +which in turn is the same as specifying the pattern abc with +the @code{I} modifier. In other words, ``top level'' settings +apply to the whole pattern (unless there are other +changes inside subpatterns). If there is more than one setting +of the same option at top level, the rightmost setting +is used. + +If an option change occurs inside a subpattern, the effect +is different. This is a change of behaviour in Perl 5.005. +An option change inside a subpattern affects only that part +of the subpattern @emph{that follows} it, so + +@example +(a(?i)b)c +@end example + +@noindent +matches abc and aBc and no other strings (assuming +case-sensitive matching is used). By this means, options can +be made to have different settings in different parts of the +pattern. Any changes made in one alternative do carry on +into subsequent branches within the same subpattern. For +example, + +@example +(a(?i)b|c) +@end example + +@noindent +matches @samp{ab}, @samp{aB}, @samp{c}, and @samp{C}, +even though when matching @samp{C} the first branch is +abandoned before the option setting. +This is because the effects of option settings happen at +compile time. There would be some very weird behaviour otherwise. + +@ignore +There are two PCRE-specific options PCRE_UNGREEDY and PCRE_EXTRA +that can be changed in the same way as the Perl-compatible options by +using the characters U and X respectively. The (?X) flag +setting is special in that it must always occur earlier in +the pattern than any of the additional features it turns on, +even when it is at top level. It is best put at the start. +@end ignore + + +@node Non-capturing subpatterns +@appendixsec Non-capturing subpatterns +@cindex Perl-style regular expressions, non-capturing subpatterns + +Marking part of a pattern as a subpattern does two things. +On one hand, it localizes a set of alternatives; on the other +hand, it sets up the subpattern as a capturing subpattern (as +defined above). The subpattern can be backreferenced and +referenced in the right side of @code{s} commands. + +For example, if the string @samp{the red king} is matched against +the pattern + +@example +the ((red|white) (king|queen)) +@end example + +@noindent +the captured substrings are @samp{red king}, @samp{red}, +and @samp{king}, and are numbered 1, 2, and 3. + +The fact that plain parentheses fulfil two functions is not +always helpful. There are often times when a grouping +subpattern is required without a capturing requirement. If an +opening parenthesis is followed by @code{?:}, the subpattern does +not do any capturing, and is not counted when computing the +number of any subsequent capturing subpatterns. For example, +if the string @samp{the white queen} is matched against the pattern + +@example +the ((?:red|white) (king|queen)) +@end example + +@noindent +the captured substrings are @samp{white queen} and @samp{queen}, +and are numbered 1 and 2. The maximum number of captured +substrings is 99, while the maximum number of all subpatterns, +both capturing and non-capturing, is 200. + +As a convenient shorthand, if any option settings are +equired at the start of a non-capturing subpattern, the +option letters may appear between the @code{?} and the +@code{:}. Thus the two patterns + +@example +(?i:saturday|sunday) +(?:(?i)saturday|sunday) +@end example + +@noindent +match exactly the same set of strings. Because alternative +branches are tried from left to right, and options are not +reset until the end of the subpattern is reached, an option +setting in one branch does affect subsequent branches, so +the above patterns match @samp{SUNDAY} as well as @samp{Saturday}. + + +@node Repetition +@appendixsec Repetition +@cindex Perl-style regular expressions, repetitions + +Repetition is specified by quantifiers, which can follow any +of the following items: + +@itemize @bullet +@item +a single character, possibly escaped + +@item +the @code{.} special character + +@item +a character class + +@item +a back reference (see next section) + +@item +a parenthesized subpattern (unless it is an assertion; @pxref{Assertions}) +@end itemize + +The general repetition quantifier specifies a minimum and +maximum number of permitted matches, by giving the two +numbers in curly brackets (braces), separated by a comma. +The numbers must be less than 65536, and the first must be +less than or equal to the second. For example: + +@example +z@{2,4@} +@end example + +@noindent +matches @samp{zz}, @samp{zzz}, or @samp{zzzz}. A closing brace on its own +is not a special character. If the second number is omitted, +but the comma is present, there is no upper limit; if the +second number and the comma are both omitted, the quantifier +specifies an exact number of required matches. Thus + +@example +[aeiou]@{3,@} +@end example + +@noindent +matches at least 3 successive vowels, but may match many +more, while + +@example +\d@{8@} +@end example + +@noindent +matches exactly 8 digits. An opening curly bracket that +appears in a position where a quantifier is not allowed, or +one that does not match the syntax of a quantifier, is taken +as a literal character. For example, @{,6@} is not a quantifier, +but a literal string of four characters.@footnote{It +raises an error if @option{-R} is not used.} + +The quantifier @samp{@{0@}} is permitted, causing the expression to +behave as if the previous item and the quantifier were not +present. + +For convenience (and historical compatibility) the three +most common quantifiers have single-character abbreviations: + +@table @code +@item * +is equivalent to @{0,@} + +@item + +is equivalent to @{1,@} + +@item ? +is equivalent to @{0,1@} +@end table + +It is possible to construct infinite loops by following a +subpattern that can match no characters with a quantifier +that has no upper limit, for example: + +@example +(a?)* +@end example + +Earlier versions of Perl used to give an error at +compile time for such patterns. However, because there are +cases where this can be useful, such patterns are now +accepted, but if any repetition of the subpattern does in +fact match no characters, the loop is forcibly broken. + +@cindex Greedy regular expression matching +@cindex Perl-style regular expressions, stingy repetitions +By default, the quantifiers are @dfn{greedy} like in @sc{posix} +mode, that is, they match as much as possible (up to the maximum +number of permitted times), without causing the rest of the +pattern to fail. The classic example of where this gives problems +is in trying to match comments in C programs. These appear between +the sequences @code{/*} and @code{*/} and within the sequence, individual +@code{*} and @code{/} characters may appear. An attempt to match C +comments by applying the pattern + +@example +/\*.*\*/ +@end example + +@noindent +to the string + +@example +/* first command */ not comment /* second comment */ +@end example + +@noindent + +fails, because it matches the entire string owing to the +greediness of the @code{.*} item. + +However, if a quantifier is followed by a question mark, it +ceases to be greedy, and instead matches the minimum number +of times possible, so the pattern @code{/\*.*?\*/} +does the right thing with the C comments. The meaning of the +various quantifiers is not otherwise changed, just the preferred +number of matches. Do not confuse this use of question +mark with its use as a quantifier in its own right. +Because it has two uses, it can sometimes appear doubled, as in + +@example +\d??\d +@end example + +which matches one digit by preference, but can match two if +that is the only way the rest of the pattern matches. + +Note that greediness does not matter when specifying addresses, +but can be nevertheless used to improve performance. + +@ignore +If the PCRE_UNGREEDY option is set (an option which is not +available in Perl), the quantifiers are not greedy by +default, but individual ones can be made greedy by following +them with a question mark. In other words, it inverts the +default behaviour. +@end ignore + +When a parenthesized subpattern is quantified with a minimum +repeat count that is greater than 1 or with a limited maximum, +more store is required for the compiled pattern, in +proportion to the size of the minimum or maximum. + +@cindex Perl-style regular expressions, single line +If a pattern starts with @code{.*} or @code{.@{0,@}} and the +@code{S} modifier is used, the pattern is implicitly anchored, +because whatever follows will be tried against every character +position in the subject string, so there is no point in +retrying the overall match at any position after the first. +PCRE treats such a pattern as though it were preceded by \A. + +When a capturing subpattern is repeated, the value captured +is the substring that matched the final iteration. For example, +after + +@example +(tweedle[dume]@{3@}\s*)+ +@end example + +@noindent +has matched @samp{tweedledum tweedledee} the value of the +captured substring is @samp{tweedledee}. However, if there are +nested capturing subpatterns, the corresponding captured +values may have been set in previous iterations. For example, +after + +@example +/(a|(b))+/ +@end example + +matches @samp{aba}, the value of the second captured substring is +@samp{b}. + +@node Backreferences +@appendixsec Backreferences +@cindex Perl-style regular expressions, backreferences + +Outside a character class, a backslash followed by a digit +greater than 0 (and possibly further digits) is a back +reference to a capturing subpattern earlier (i.e. to its +left) in the pattern, provided there have been that many +previous capturing left parentheses. + +However, if the decimal number following the backslash is +less than 10, it is always taken as a back reference, and +causes an error only if there are not that many capturing +left parentheses in the entire pattern. In other words, the +parentheses that are referenced need not be to the left of +the reference for numbers less than 10. @ref{Backslash} +for further details of the handling of digits following a backslash. + +A back reference matches whatever actually matched the capturing +subpattern in the current subject string, rather than +anything matching the subpattern itself. So the pattern + +@example +(sens|respons)e and \1ibility +@end example + +@noindent +matches @samp{sense and sensibility} and @samp{response and responsibility}, +but not @samp{sense and responsibility}. If caseful +matching is in force at the time of the back reference, the +case of letters is relevant. For example, + +@example +((?i)blah)\s+\1 +@end example + +@noindent +matches @samp{blah blah} and @samp{Blah Blah}, but not +@samp{BLAH blah}, even though the original capturing +subpattern is matched caselessly. + +There may be more than one back reference to the same subpattern. +Also, if a subpattern has not actually been used in a +particular match, any back references to it always fail. For +example, the pattern + +@example +(a|(bc))\2 +@end example + +@noindent +always fails if it starts to match @samp{a} rather than +@samp{bc}. Because there may be up to 99 back references, all +digits following the backslash are taken as part of a potential +back reference number; this is different from what happens +in @sc{posix} mode. If the pattern continues with a digit +character, some delimiter must be used to terminate the back +reference. If the @code{X} modifier option is set, this can be +whitespace. Otherwise an empty comment can be used, or the +following character can be expressed in hexadecimal or octal. +Note that this applies only to the LHS pattern; it is +not possible yet to specify more than 9 backreferences on the +RHS of the `s' command. + +A back reference that occurs inside the parentheses to which +it refers fails when the subpattern is first used, so, for +example, @code{(a\1)} never matches. However, such references +can be useful inside repeated subpatterns. For example, the +pattern + +@example +(a|b\1)+ +@end example + +@noindent +matches any number of @samp{a}s and also @samp{aba}, @samp{ababbaa}, +etc. At each iteration of the subpattern, the back reference matches +the character string corresponding to the previous iteration. In +order for this to work, the pattern must be such that the first +iteration does not need to match the back reference. This can be +done using alternation, as in the example above, or by a +quantifier with a minimum of zero. + +@node Assertions +@appendixsec Assertions +@cindex Perl-style regular expressions, assertions +@cindex Perl-style regular expressions, asserting subpatterns + +An assertion is a test on the characters following or +preceding the current matching point that does not actually +consume any characters. The simple assertions coded as @code{\b}, +@code{\B}, @code{\A}, @code{\Z}, @code{\z}, @code{^} and @code{$} +are described above. More complicated assertions are coded as +subpatterns. There are two kinds: those that look ahead of the +current position in the subject string, and those that look behind it. + +@cindex Perl-style regular expressions, lookahead subpatterns +An assertion subpattern is matched in the normal way, except +that it does not cause the current matching position to be +changed. Lookahead assertions start with @code{(?=} for positive +assertions and @code{(?!} for negative assertions. For example, + +@example +\w+(?=;) +@end example + +@noindent +matches a word followed by a semicolon, but does not include +the semicolon in the match, and + +@example +foo(?!bar) +@end example + +@noindent +matches any occurrence of @samp{foo} that is not followed by +@samp{bar}. + +Note that the apparently similar pattern + +@example +(?!foo)bar +@end example + +@noindent +@cindex Perl-style regular expressions, lookbehind subpatterns +finds any occurrence of @samp{bar} even if it is preceded by +@samp{foo}, because the assertion @code{(?!foo)} is always true +when the next three characters are @samp{bar}. A lookbehind +assertion is needed to achieve this effect. +Lookbehind assertions start with @code{(?<=} for positive +assertions and @code{(?<!} for negative assertions. So, + +@example +(?<!foo)bar +@end example + +achieves the required effect of finding an occurrence of +@samp{bar} that is not preceded by @samp{foo}. The contents of a +lookbehind assertion are restricted +such that all the strings it matches must have a fixed +length. However, if there are several alternatives, they do +not all have to have the same fixed length. This is an extension +compared with Perl 5.005, which requires all branches to match +the same length of string. Thus + +@example +(?<=dogs|cats|) +@end example + +@noindent +is permitted, but the apparently equivalent regular expression + +@example +(?<!dogs?|cats?) +@end example + +@noindent +causes an error at compile time. Branches that match different +length strings are permitted only at the top level of +a lookbehind assertion: an assertion such as + +@example +(?<=ab(c|de)) +@end example + +@noindent +is not permitted, because its single top-level branch can +match two different lengths, but it is acceptable if rewritten +to use two top-level branches: + +@example +(?<=abc|abde) +@end example + +All this is required because lookbehind assertions simply +move the current position back by the alternative's fixed +width and then try to match. If there are +insufficient characters before the current position, the +match is deemed to fail. Lookbehinds, in conjunction with +non-backtracking subpatterns can be particularly useful for +matching at the ends of strings; an example is given at the end +of the section on non-backtracking subpatterns. + +Several assertions (of any sort) may occur in succession. +For example, + +@example +(?<=\d@{3@})(?<!999)foo +@end example + +@noindent +matches @samp{foo} preceded by three digits that are not @samp{999}. +Notice that each of the assertions is applied independently +at the same point in the subject string. First there is a +check that the previous three characters are all digits, and +then there is a check that the same three characters are not +@samp{999}. This pattern does not match @samp{foo} preceded by six +characters, the first of which are digits and the last three +of which are not @samp{999}. For example, it doesn't match +@samp{123abcfoo}. A pattern to do that is + +@example +(?<=\d@{3@}...)(?<!999)foo +@end example + +@noindent +This time the first assertion looks at the preceding six +characters, checking that the first three are digits, and +then the second assertion checks that the preceding three +characters are not @samp{999}. Actually, assertions can be +nested in any combination, so one can write this as + +@example +(?<=\d@{3@}(?!999)...)foo +@end example + +or + +@example +(?<=\d@{3@}...(?<!999))foo +@end example + +@noindent +both of which might be considered more readable. + +Assertion subpatterns are not capturing subpatterns, and may +not be repeated, because it makes no sense to assert the +same thing several times. If any kind of assertion contains +capturing subpatterns within it, these are counted for the +purposes of numbering the capturing subpatterns in the whole +pattern. However, substring capturing is carried out only +for positive assertions, because it does not make sense for +negative assertions. + +Assertions count towards the maximum of 200 parenthesized +subpatterns. + +@node Non-backtracking subpatterns +@appendixsec Non-backtracking subpatterns +@cindex Perl-style regular expressions, non-backtracking subpatterns + +With both maximizing and minimizing repetition, failure of +what follows normally causes the repeated item to be evaluated +again to see if a different number of repeats allows the +rest of the pattern to match. Sometimes it is useful to +prevent this, either to change the nature of the match, or +to cause it fail earlier than it otherwise might, when the +author of the pattern knows there is no point in carrying +on. + +Consider, for example, the pattern @code{\d+foo} when applied to +the subject line + +@example +123456bar +@end example + +After matching all 6 digits and then failing to match @samp{foo}, +the normal action of the matcher is to try again with only 5 +digits matching the @code{\d+} item, and then with 4, and so on, +before ultimately failing. Non-backtracking subpatterns +provide the means for specifying that once a portion of the +pattern has matched, it is not to be re-evaluated in this way, +so the matcher would give up immediately on failing to match +@samp{foo} the first time. The notation is another kind of special +parenthesis, starting with @code{(?>} as in this example: + +@example +(?>\d+)bar +@end example + +This kind of parenthesis ``locks up'' the part of the pattern +it contains once it has matched, and a failure further into +the pattern is prevented from backtracking into it. +Backtracking past it to previous items, however, works as +normal. + +Non-backtracking subpatterns are not capturing subpatterns. Simple +cases such as the above example can be thought of as a maximizing +repeat that must swallow everything it can. So, +while both @code{\d+} and @code{\d+?} are prepared to adjust the number of +digits they match in order to make the rest of the pattern +match, @code{(?>\d+)} can only match an entire sequence of digits. + +This construction can of course contain arbitrarily complicated +subpatterns, and it can be nested. + +@cindex Perl-style regular expressions, lookbehind subpatterns +Non-backtracking subpatterns can be used in conjunction with look-behind +assertions to specify efficient matching at the end +of the subject string. Consider a simple pattern such as + +@example +abcd$ +@end example + +@noindent +when applied to a long string which does not match. Because +matching proceeds from left to right, @command{sed} will look for +each @samp{a} in the subject and then see if what follows matches +the rest of the pattern. If the pattern is specified as + +@example +^.*abcd$ +@end example + +@noindent +the initial @code{.*} matches the entire string at first, but when +this fails (because there is no following @samp{a}), it backtracks +to match all but the last character, then all but the +last two characters, and so on. Once again the search for +@samp{a} covers the entire string, from right to left, so we are +no better off. However, if the pattern is written as + +@example +^(?>.*)(?<=abcd) +@end example + +there can be no backtracking for the .* item; it can match +only the entire string. The subsequent lookbehind assertion +does a single test on the last four characters. If it fails, +the match fails immediately. For long strings, this approach +makes a significant difference to the processing time. + +When a pattern contains an unlimited repeat inside a subpattern +that can itself be repeated an unlimited number of +times, the use of a once-only subpattern is the only way to +avoid some failing matches taking a very long time +indeed.@footnote{Actually, the matcher embedded in @value{SSED} +tries to do something for this in the simplest cases, +like @code{([^b]*b)*}. These cases are actually quite +common: they happen for example in a regular expression +like @code{\/\*([^*]*\*)*\/} which matches C comments.} + +The pattern + +@example +(\D+|<\d+>)*[!?] +@end example + +([^0-9<]+<(\d+>)?)*[!?] + +@noindent +matches an unlimited number of substrings that either consist +of non-digits, or digits enclosed in angular brackets, followed by +an exclamation or question mark. When it matches, it runs quickly. +However, if it is applied to + +@example +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +@end example + +@noindent +it takes a long time before reporting failure. This is +because the string can be divided between the two repeats in +a large number of ways, and all have to be tried.@footnote{The +example used @code{[!?]} rather than a single character at the end, +because both @value{SSED} and Perl have an optimization that allows +for fast failure when a single character is used. They +remember the last single character that is required for a +match, and fail early if it is not present in the string.} + +If the pattern is changed to + +@example +((?>\D+)|<\d+>)*[!?] +@end example + +sequences of non-digits cannot be broken, and failure happens +quickly. + +@node Conditional subpatterns +@appendixsec Conditional subpatterns +@cindex Perl-style regular expressions, conditional subpatterns + +It is possible to cause the matching process to obey a subpattern +conditionally or to choose between two alternative +subpatterns, depending on the result of an assertion, or +whether a previous capturing subpattern matched or not. The +two possible forms of conditional subpattern are + +@example +(?(@var{condition})@var{yes-pattern}) +(?(@var{condition})@var{yes-pattern}|@var{no-pattern}) +@end example + +If the condition is satisfied, the yes-pattern is used; otherwise +the no-pattern (if present) is used. If there are more than two +alternatives in the subpattern, a compile-time error occurs. + +There are two kinds of condition. If the text between the +parentheses consists of a sequence of digits, the condition +is satisfied if the capturing subpattern of that number has +previously matched. The number must be greater than zero. +Consider the following pattern, which contains non-significant +white space to make it more readable (assume the @code{X} modifier) +and to divide it into three parts for ease of discussion: + +@example +( \( )? [^()]+ (?(1) \) ) +@end example + +The first part matches an optional opening parenthesis, and +if that character is present, sets it as the first captured +substring. The second part matches one or more characters +that are not parentheses. The third part is a conditional +subpattern that tests whether the first set of parentheses +matched or not. If they did, that is, if subject started +with an opening parenthesis, the condition is true, and so +the yes-pattern is executed and a closing parenthesis is +required. Otherwise, since no-pattern is not present, the +subpattern matches nothing. In other words, this pattern +matches a sequence of non-parentheses, optionally enclosed +in parentheses. + +@cindex Perl-style regular expressions, lookahead subpatterns +If the condition is not a sequence of digits, it must be an +assertion. This may be a positive or negative lookahead or +lookbehind assertion. Consider this pattern, again containing +non-significant white space, and with the two alternatives +on the second line: + +@example +(?(?=...[a-z]) + \d\d-[a-z]@{3@}-\d\d | + \d\d-\d\d-\d\d ) +@end example + +The condition is a positive lookahead assertion that matches +a letter that is three characters away from the current point. +If a letter is found, the subject is matched against the first +alternative @samp{@var{dd}-@var{aaa}-@var{dd}} (where @var{aaa} are +letters and @var{dd} are digits); otherwise it is matched against +the second alternative, @samp{@var{dd}-@var{dd}-@var{dd}}. + + +@node Recursive patterns +@appendixsec Recursive patterns +@cindex Perl-style regular expressions, recursive patterns +@cindex Perl-style regular expressions, recursion + +Consider the problem of matching a string in parentheses, +allowing for unlimited nested parentheses. Without the use +of recursion, the best that can be done is to use a pattern +that matches up to some fixed depth of nesting. It is not +possible to handle an arbitrary nesting depth. Perl 5.6 has +provided an experimental facility that allows regular +expressions to recurse (amongst other things). It does this +by interpolating Perl code in the expression at run time, +and the code can refer to the expression itself. A Perl pattern +tern to solve the parentheses problem can be created like +this: + +@example +$re = qr@{\( (?: (?>[^()]+) | (?p@{$re@}) )* \)@}x; +@end example + +The @code{(?p@{...@})} item interpolates Perl code at run time, +and in this case refers recursively to the pattern in which it +appears. Obviously, @command{sed} cannot support the interpolation of +Perl code. Instead, the special item @code{(?R)} is provided for +the specific case of recursion. This pattern solves the +parentheses problem (assume the @code{X} modifier option is used +so that white space is ignored): + +@example +\( ( (?>[^()]+) | (?R) )* \) +@end example + +First it matches an opening parenthesis. Then it matches any +number of substrings which can either be a sequence of +non-parentheses, or a recursive match of the pattern itself +(i.e. a correctly parenthesized substring). Finally there is +a closing parenthesis. + +This particular example pattern contains nested unlimited +repeats, and so the use of a non-backtracking subpattern for +matching strings of non-parentheses is important when applying +the pattern to strings that do not match. For example, when +it is applied to + +@example +(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa() +@end example + +it yields a ``no match'' response quickly. However, if a +standard backtracking subpattern is not used, the match runs +for a very long time indeed because there are so many different +ways the @code{+} and @code{*} repeats can carve up the subject, +and all have to be tested before failure can be reported. + +The values set for any capturing subpatterns are those from +the outermost level of the recursion at which the subpattern +value is set. If the pattern above is matched against + +@example +(ab(cd)ef) +@end example + +@noindent +the value for the capturing parentheses is @samp{ef}, which is +the last value taken on at the top level. + +@node Comments +@appendixsec Comments +@cindex Perl-style regular expressions, comments + +The sequence (?# marks the start of a comment which continues +ues up to the next closing parenthesis. Nested parentheses +are not permitted. The characters that make up a comment +play no part in the pattern matching at all. + +@cindex Perl-style regular expressions, extended +If the @code{X} modifier option is used, an unescaped @code{#} character +outside a character class introduces a comment that continues +up to the next newline character in the pattern. +@end ifset + + +@page +@node Concept Index +@unnumbered Concept Index + +This is a general index of all issues discussed in this manual, with the +exception of the @command{sed} commands and command-line options. + +@printindex cp + +@page +@node Command and Option Index +@unnumbered Command and Option Index + +This is an alphabetical list of all @command{sed} commands and command-line +options. + +@printindex fn + +@contents +@bye + +@c XXX FIXME: the term "cycle" is never defined... diff --git a/doc/sed.x b/doc/sed.x new file mode 100644 index 0000000..433d52f --- /dev/null +++ b/doc/sed.x @@ -0,0 +1,333 @@ +.SH NAME +sed \- a Stream EDitor +.SH SYNOPSIS +.nf +sed [-V] [--version] [--help] [-n] [--quiet] [--silent] + [-l N] [--line-length=N] [-u] [--unbuffered] + [-r] [--regexp-extended] + [-e script] [--expression=script] + [-f script-file] [--file=script-file] + [script-if-no-other-script] + [file...] +.fi +[DESCRIPTION] +.ds sd \fIsed\fP +.ds Sd \fISed\fP +\*(Sd is a stream editor. +A stream editor is used to perform basic text +transformations on an input stream +(a file or input from a pipeline). +While in some ways similar to an editor which +permits scripted edits (such as \fIed\fP), +\*(sd works by making only one pass over the +input(s), and is consequently more efficient. +But it is \*(sd's ability to filter text in a pipeline +which particularly distinguishes it from other types of +editors. + +[COMMAND SYNOPSIS] +This is just a brief synopsis of \*(sd commands to serve as +a reminder to those who already know \*(sd; +other documentation (such as the texinfo document) +must be consulted for fuller descriptions. +.SS +Zero-address ``commands'' +.TP +.RI :\ label +Label for +.B b +and +.B t +commands. +.TP +.RI # comment +The comment extends until the next newline (or the end of a +.B -e +script fragment). +.TP +} +The closing bracket of a { } block. +.SS +Zero- or One- address commands +.TP += +Print the current line number. +.TP +a \e +.TP +.I text +Append +.IR text , +which has each embedded newline preceded by a backslash. +.TP +i \e +.TP +.I text +Insert +.IR text , +which has each embedded newline preceded by a backslash. +.TP +q [\fIexit-code\fR] +Immediately quit the \*(sd script without processing +any more input, except that if auto-print is not disabled +the current pattern space will be printed. The exit code +argument is a GNU extension. +.TP +Q [\fIexit-code\fR] +Immediately quit the \*(sd script without processing +any more input. This is a GNU extension. +.TP +.RI r\ filename +Append text read from +.IR filename . +.TP +.RI R\ filename +Append a line read from +.IR filename . +Each invocation of the command reads a line from the file. +This is a GNU extension. +.SS +Commands which accept address ranges +.TP +{ +Begin a block of commands (end with a }). +.TP +.RI b\ label +Branch to +.IR label ; +if +.I label +is omitted, branch to end of script. +.TP +c \e +.TP +.I text +Replace the selected lines with +.IR text , +which has each embedded newline preceded by a backslash. +.TP +d +Delete pattern space. +Start next cycle. +.TP +D +If pattern space contains no newline, start a normal new cycle as if +the d command was issued. Otherwise, delete text in the pattern +space up to the first newline, and restart cycle with the resultant +pattern space, without reading a new line of input. +.TP +h H +Copy/append pattern space to hold space. +.TP +g G +Copy/append hold space to pattern space. +.TP +l +List out the current line in a ``visually unambiguous'' form. +.TP +.RI l\ width +List out the current line in a ``visually unambiguous'' form, +breaking it at +.I width +characters. This is a GNU extension. +.TP +n N +Read/append the next line of input into the pattern space. +.TP +p +Print the current pattern space. +.TP +P +Print up to the first embedded newline of the current pattern space. +.TP +.RI s/ regexp / replacement / +Attempt to match +.I regexp +against the pattern space. +If successful, replace that portion matched +with +.IR replacement . +The +.I replacement +may contain the special character +.B & +to refer to that portion of the pattern space which matched, +and the special escapes \e1 through \e9 to refer to the +corresponding matching sub-expressions in the +.IR regexp . +.TP +.RI t\ label +If a s/// has done a successful substitution since the +last input line was read and since the last t or T +command, then branch to +.IR label ; +if +.I label +is omitted, branch to end of script. +.TP +.RI T\ label +If no s/// has done a successful substitution since the +last input line was read and since the last t or T +command, then branch to +.IR label ; +if +.I label +is omitted, branch to end of script. This is a GNU +extension. +.TP +.RI w\ filename +Write the current pattern space to +.IR filename . +.TP +.RI W\ filename +Write the first line of the current pattern space to +.IR filename . +This is a GNU extension. +.TP +x +Exchange the contents of the hold and pattern spaces. +.TP +.RI y/ source / dest / +Transliterate the characters in the pattern space which appear in +.I source +to the corresponding character in +.IR dest . +.SH +Addresses +\*(Sd commands can be given with no addresses, in which +case the command will be executed for all input lines; +with one address, in which case the command will only be executed +for input lines which match that address; or with two +addresses, in which case the command will be executed +for all input lines which match the inclusive range of +lines starting from the first address and continuing to +the second address. +Three things to note about address ranges: +the syntax is +.IR addr1 , addr2 +(i.e., the addresses are separated by a comma); +the line which +.I addr1 +matched will always be accepted, +even if +.I addr2 +selects an earlier line; +and if +.I addr2 +is a +.IR regexp , +it will not be tested against the line that +.I addr1 +matched. +.PP +After the address (or address-range), +and before the command, a +.B ! +may be inserted, +which specifies that the command shall only be +executed if the address (or address-range) does +.B not +match. +.PP +The following address types are supported: +.TP +.I number +Match only the specified line +.IR number +(which increments cumulatively across files, unless the +.B -s +option is specified on the command line). +.TP +.IR first ~ step +Match every +.IR step 'th +line starting with line +.IR first . +For example, ``sed -n 1~2p'' will print all the odd-numbered lines in +the input stream, and the address 2~5 will match every fifth line, +starting with the second. +.I first +can be zero; in this case, \*(sd operates as if it were equal to +.IR step . +(This is an extension.) +.TP +$ +Match the last line. +.TP +.RI / regexp / +Match lines matching the regular expression +.IR regexp . +.TP +.BI \fR\e\fPc regexp c +Match lines matching the regular expression +.IR regexp . +The +.B c +may be any character. +.PP +GNU \*(sd also supports some special 2-address forms: +.TP +.RI 0, addr2 +Start out in "matched first address" state, until +.I addr2 +is found. +This is similar to +.RI 1, addr2 , +except that if +.I addr2 +matches the very first line of input the +.RI 0, addr2 +form will be at the end of its range, whereas the +.RI 1, addr2 +form will still be at the beginning of its range. +This works only when +.I addr2 +is a regular expression. +.TP +.IR addr1 ,+ N +Will match +.I addr1 +and the +.I N +lines following +.IR addr1 . +.TP +.IR addr1 ,~ N +Will match +.I addr1 +and the lines following +.I addr1 +until the next line whose input line number is a multiple of +.IR N . + +[REGULAR EXPRESSIONS] +POSIX.2 BREs +.I should +be supported, but they aren't completely because of performance +problems. +The +.B \en +sequence in a regular expression matches the newline character, +and similarly for +.BR \ea , +.BR \et , +and other sequences. + +[SEE ALSO] +.BR awk (1), +.BR ed (1), +.BR grep (1), +.BR tr (1), +.BR perlre (1), +sed.info, +any of various books on \*(sd, +.na +the \*(sd FAQ (http://sed.sf.net/grabbag/tutorials/sedfaq.txt), +http://sed.sf.net/grabbag/. + +[BUGS] +.PP +E-mail bug reports to +.BR bug-sed@gnu.org . +Also, please include the output of ``sed --version'' in the body +of your report if at all possible. diff --git a/doc/stamp-vti b/doc/stamp-vti new file mode 100644 index 0000000..0a7839c --- /dev/null +++ b/doc/stamp-vti @@ -0,0 +1,4 @@ +@set UPDATED 22 December 2012 +@set UPDATED-MONTH December 2012 +@set EDITION 4.2.2 +@set VERSION 4.2.2 diff --git a/doc/version.texi b/doc/version.texi new file mode 100644 index 0000000..0a7839c --- /dev/null +++ b/doc/version.texi @@ -0,0 +1,4 @@ +@set UPDATED 22 December 2012 +@set UPDATED-MONTH December 2012 +@set EDITION 4.2.2 +@set VERSION 4.2.2 |