diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-01-20 10:55:18 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-01-20 10:55:18 +0000 |
commit | 70e9163c9c18e995515598085cb824e554eb7ae7 (patch) | |
tree | a42dc8b2a6c031354bf31472de888bfc8a060132 /lib/quotearg.c | |
parent | cbf5993c43f49281173f185863577d86bfac6eae (diff) | |
download | coreutils-tarball-master.tar.gz |
coreutils-8.25HEADcoreutils-8.25master
Diffstat (limited to 'lib/quotearg.c')
-rw-r--r-- | lib/quotearg.c | 1008 |
1 files changed, 671 insertions, 337 deletions
diff --git a/lib/quotearg.c b/lib/quotearg.c index f7f326a..73a5084 100644 --- a/lib/quotearg.c +++ b/lib/quotearg.c @@ -1,12 +1,11 @@ /* quotearg.c - quote arguments for output - Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free - Software Foundation, Inc. + Copyright (C) 1998-2002, 2004-2016 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -14,16 +13,25 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* Written by Paul Eggert <eggert@twinsun.com> */ +/* Without this pragma, gcc 4.7.0 20111124 mistakenly suggests that + the quoting_options_from_style function might be candidate for + attribute 'pure' */ +#if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__ +# pragma GCC diagnostic ignored "-Wsuggest-attribute=pure" +#endif + #include <config.h> #include "quotearg.h" +#include "quote.h" #include "xalloc.h" +#include "c-strcaseeq.h" +#include "localcharset.h" #include <ctype.h> #include <errno.h> @@ -38,23 +46,6 @@ #define _(msgid) gettext (msgid) #define N_(msgid) msgid -#if !HAVE_MBRTOWC -/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the - other macros are defined only for documentation and to satisfy C - syntax. */ -# undef MB_CUR_MAX -# define MB_CUR_MAX 1 -# undef mbstate_t -# define mbstate_t int -# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) -# define iswprint(wc) isprint ((unsigned char) (wc)) -# undef HAVE_MBSINIT -#endif - -#if !defined mbsinit && !HAVE_MBSINIT -# define mbsinit(ps) 1 -#endif - #ifndef SIZE_MAX # define SIZE_MAX ((size_t) -1) #endif @@ -66,9 +57,18 @@ struct quoting_options /* Basic quoting style. */ enum quoting_style style; + /* Additional flags. Bitwise combination of enum quoting_flags. */ + int flags; + /* Quote the characters indicated by this bit vector even if the quoting style would not normally require them to be quoted. */ unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; + + /* The left quote for custom_quoting_style. */ + char const *left_quote; + + /* The right quote for custom_quoting_style. */ + char const *right_quote; }; /* Names of quoting styles. */ @@ -77,7 +77,10 @@ char const *const quoting_style_args[] = "literal", "shell", "shell-always", + "shell-escape", + "shell-escape-always", "c", + "c-maybe", "escape", "locale", "clocale", @@ -90,7 +93,10 @@ enum quoting_style const quoting_style_vals[] = literal_quoting_style, shell_quoting_style, shell_always_quoting_style, + shell_escape_quoting_style, + shell_escape_always_quoting_style, c_quoting_style, + c_maybe_quoting_style, escape_quoting_style, locale_quoting_style, clocale_quoting_style @@ -107,14 +113,14 @@ clone_quoting_options (struct quoting_options *o) { int e = errno; struct quoting_options *p = xmemdup (o ? o : &default_quoting_options, - sizeof *o); + sizeof *o); errno = e; return p; } /* Get the value of O's quoting style. If O is null, use the default. */ enum quoting_style -get_quoting_style (struct quoting_options *o) +get_quoting_style (struct quoting_options const *o) { return (o ? o : &default_quoting_options)->style; } @@ -144,20 +150,88 @@ set_char_quoting (struct quoting_options *o, char c, int i) return r; } +/* In O (or in the default if O is null), + set the value of the quoting options flag to I, which can be a + bitwise combination of enum quoting_flags, or 0 for default + behavior. Return the old value. */ +int +set_quoting_flags (struct quoting_options *o, int i) +{ + int r; + if (!o) + o = &default_quoting_options; + r = o->flags; + o->flags = i; + return r; +} + +void +set_custom_quoting (struct quoting_options *o, + char const *left_quote, char const *right_quote) +{ + if (!o) + o = &default_quoting_options; + o->style = custom_quoting_style; + if (!left_quote || !right_quote) + abort (); + o->left_quote = left_quote; + o->right_quote = right_quote; +} + +/* Return quoting options for STYLE, with no extra quoting. */ +static struct quoting_options /* NOT PURE!! */ +quoting_options_from_style (enum quoting_style style) +{ + struct quoting_options o = { literal_quoting_style, 0, { 0 }, NULL, NULL }; + if (style == custom_quoting_style) + abort (); + o.style = style; + return o; +} + /* MSGID approximates a quotation mark. Return its translation if it - has one; otherwise, return either it or "\"", depending on S. */ + has one; otherwise, return either it or "\"", depending on S. + + S is either clocale_quoting_style or locale_quoting_style. */ static char const * gettext_quote (char const *msgid, enum quoting_style s) { char const *translation = _(msgid); - if (translation == msgid && s == clocale_quoting_style) - translation = "\""; - return translation; + char const *locale_code; + + if (translation != msgid) + return translation; + + /* For UTF-8 and GB-18030, use single quotes U+2018 and U+2019. + Here is a list of other locales that include U+2018 and U+2019: + + ISO-8859-7 0xA1 KOI8-T 0x91 + CP869 0x8B CP874 0x91 + CP932 0x81 0x65 CP936 0xA1 0xAE + CP949 0xA1 0xAE CP950 0xA1 0xA5 + CP1250 0x91 CP1251 0x91 + CP1252 0x91 CP1253 0x91 + CP1254 0x91 CP1255 0x91 + CP1256 0x91 CP1257 0x91 + EUC-JP 0xA1 0xC6 EUC-KR 0xA1 0xAE + EUC-TW 0xA1 0xE4 BIG5 0xA1 0xA5 + BIG5-HKSCS 0xA1 0xA5 EUC-CN 0xA1 0xAE + GBK 0xA1 0xAE Georgian-PS 0x91 + PT154 0x91 + + None of these is still in wide use; using iconv is overkill. */ + locale_code = locale_charset (); + if (STRCASEEQ (locale_code, "UTF-8", 'U','T','F','-','8',0,0,0,0)) + return msgid[0] == '`' ? "\xe2\x80\x98": "\xe2\x80\x99"; + if (STRCASEEQ (locale_code, "GB18030", 'G','B','1','8','0','3','0',0,0)) + return msgid[0] == '`' ? "\xa1\ae": "\xa1\xaf"; + + return (s == clocale_quoting_style ? "\"" : "'"); } /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of - argument ARG (of size ARGSIZE), using QUOTING_STYLE and the - non-quoting-style part of O to control quoting. + argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and + QUOTE_THESE_TOO to control quoting. Terminate the output with a null character, and return the written size of the output, not counting the terminating null. If BUFFERSIZE is too small to store the output string, return the @@ -165,14 +239,16 @@ gettext_quote (char const *msgid, enum quoting_style s) If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE. This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, - ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting - style specified by O, and O may not be null. */ + ARGSIZE, O), except it breaks O into its component pieces and is + not careful about errno. */ static size_t quotearg_buffer_restyled (char *buffer, size_t buffersize, - char const *arg, size_t argsize, - enum quoting_style quoting_style, - struct quoting_options const *o) + char const *arg, size_t argsize, + enum quoting_style quoting_style, int flags, + unsigned int const *quote_these_too, + char const *left_quote, + char const *right_quote) { size_t i; size_t len = 0; @@ -180,20 +256,57 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, size_t quote_string_len = 0; bool backslash_escapes = false; bool unibyte_locale = MB_CUR_MAX == 1; + bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0; + bool pending_shell_escape_end = false; #define STORE(c) \ do \ { \ - if (len < buffersize) \ - buffer[len] = (c); \ - len++; \ + if (len < buffersize) \ + buffer[len] = (c); \ + len++; \ + } \ + while (0) + +#define START_ESC() \ + do \ + { \ + if (elide_outer_quotes) \ + goto force_outer_quoting_style; \ + escaping = true; \ + if (quoting_style == shell_always_quoting_style \ + && ! pending_shell_escape_end) \ + { \ + STORE ('\''); \ + STORE ('$'); \ + STORE ('\''); \ + pending_shell_escape_end = true; \ + } \ + STORE ('\\'); \ + } \ + while (0) + +#define END_ESC() \ + do \ + { \ + if (pending_shell_escape_end && ! escaping) \ + { \ + STORE ('\''); \ + STORE ('\''); \ + pending_shell_escape_end = false; \ + } \ } \ while (0) switch (quoting_style) { + case c_maybe_quoting_style: + quoting_style = c_quoting_style; + elide_outer_quotes = true; + /* Fall through. */ case c_quoting_style: - STORE ('"'); + if (!elide_outer_quotes) + STORE ('"'); backslash_escapes = true; quote_string = "\""; quote_string_len = 1; @@ -201,301 +314,388 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, case escape_quoting_style: backslash_escapes = true; + elide_outer_quotes = false; break; case locale_quoting_style: case clocale_quoting_style: + case custom_quoting_style: { - /* TRANSLATORS: - Get translations for open and closing quotation marks. - - The message catalog should translate "`" to a left - quotation mark suitable for the locale, and similarly for - "'". If the catalog has no translation, - locale_quoting_style quotes `like this', and - clocale_quoting_style quotes "like this". - - For example, an American English Unicode locale should - translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and - should translate "'" to U+201D (RIGHT DOUBLE QUOTATION - MARK). A British English Unicode locale should instead - translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and - U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. - - If you don't know what to put here, please see - <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs> - and use glyphs suitable for your language. */ - - char const *left = gettext_quote (N_("`"), quoting_style); - char const *right = gettext_quote (N_("'"), quoting_style); - for (quote_string = left; *quote_string; quote_string++) - STORE (*quote_string); - backslash_escapes = true; - quote_string = right; - quote_string_len = strlen (quote_string); + if (quoting_style != custom_quoting_style) + { + /* TRANSLATORS: + Get translations for open and closing quotation marks. + The message catalog should translate "`" to a left + quotation mark suitable for the locale, and similarly for + "'". For example, a French Unicode local should translate + these to U+00AB (LEFT-POINTING DOUBLE ANGLE + QUOTATION MARK), and U+00BB (RIGHT-POINTING DOUBLE ANGLE + QUOTATION MARK), respectively. + + If the catalog has no translation, we will try to + use Unicode U+2018 (LEFT SINGLE QUOTATION MARK) and + Unicode U+2019 (RIGHT SINGLE QUOTATION MARK). If the + current locale is not Unicode, locale_quoting_style + will quote 'like this', and clocale_quoting_style will + quote "like this". You should always include translations + for "`" and "'" even if U+2018 and U+2019 are appropriate + for your locale. + + If you don't know what to put here, please see + <http://en.wikipedia.org/wiki/Quotation_marks_in_other_languages> + and use glyphs suitable for your language. */ + left_quote = gettext_quote (N_("`"), quoting_style); + right_quote = gettext_quote (N_("'"), quoting_style); + } + if (!elide_outer_quotes) + for (quote_string = left_quote; *quote_string; quote_string++) + STORE (*quote_string); + backslash_escapes = true; + quote_string = right_quote; + quote_string_len = strlen (quote_string); } break; + case shell_escape_quoting_style: + backslash_escapes = true; + /* Fall through. */ + case shell_quoting_style: + elide_outer_quotes = true; + /* Fall through. */ + case shell_escape_always_quoting_style: + if (!elide_outer_quotes) + backslash_escapes = true; + /* Fall through. */ case shell_always_quoting_style: - STORE ('\''); + quoting_style = shell_always_quoting_style; + if (!elide_outer_quotes) + STORE ('\''); quote_string = "'"; quote_string_len = 1; break; - default: + case literal_quoting_style: + elide_outer_quotes = false; break; + + default: + abort (); } for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++) { unsigned char c; unsigned char esc; + bool is_right_quote = false; + bool escaping = false; if (backslash_escapes - && quote_string_len - && i + quote_string_len <= argsize - && memcmp (arg + i, quote_string, quote_string_len) == 0) - STORE ('\\'); + && quoting_style != shell_always_quoting_style + && quote_string_len + && (i + quote_string_len + <= (argsize == SIZE_MAX && 1 < quote_string_len + /* Use strlen only if we must: when argsize is SIZE_MAX, + and when the quote string is more than 1 byte long. + If we do call strlen, save the result. */ + ? (argsize = strlen (arg)) : argsize)) + && memcmp (arg + i, quote_string, quote_string_len) == 0) + { + if (elide_outer_quotes) + goto force_outer_quoting_style; + is_right_quote = true; + } c = arg[i]; switch (c) - { - case '\0': - if (backslash_escapes) - { - STORE ('\\'); - STORE ('0'); - STORE ('0'); - c = '0'; - } - break; - - case '?': - switch (quoting_style) - { - case shell_quoting_style: - goto use_shell_always_quoting_style; - - case c_quoting_style: - if (i + 2 < argsize && arg[i + 1] == '?') - switch (arg[i + 2]) - { - case '!': case '\'': - case '(': case ')': case '-': case '/': - case '<': case '=': case '>': - /* Escape the second '?' in what would otherwise be - a trigraph. */ - c = arg[i + 2]; - i += 2; - STORE ('?'); - STORE ('\\'); - STORE ('?'); - break; - - default: - break; - } - break; - - default: - break; - } - break; - - case '\a': esc = 'a'; goto c_escape; - case '\b': esc = 'b'; goto c_escape; - case '\f': esc = 'f'; goto c_escape; - case '\n': esc = 'n'; goto c_and_shell_escape; - case '\r': esc = 'r'; goto c_and_shell_escape; - case '\t': esc = 't'; goto c_and_shell_escape; - case '\v': esc = 'v'; goto c_escape; - case '\\': esc = c; goto c_and_shell_escape; - - c_and_shell_escape: - if (quoting_style == shell_quoting_style) - goto use_shell_always_quoting_style; - c_escape: - if (backslash_escapes) - { - c = esc; - goto store_escape; - } - break; - - case '{': case '}': /* sometimes special if isolated */ - if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1)) - break; - /* Fall through. */ - case '#': case '~': - if (i != 0) - break; - /* Fall through. */ - case ' ': - case '!': /* special in bash */ - case '"': case '$': case '&': - case '(': case ')': case '*': case ';': - case '<': - case '=': /* sometimes special in 0th or (with "set -k") later args */ - case '>': case '[': - case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ - case '`': case '|': - /* A shell special character. In theory, '$' and '`' could - be the first bytes of multibyte characters, which means - we should check them with mbrtowc, but in practice this - doesn't happen so it's not worth worrying about. */ - if (quoting_style == shell_quoting_style) - goto use_shell_always_quoting_style; - break; - - case '\'': - switch (quoting_style) - { - case shell_quoting_style: - goto use_shell_always_quoting_style; - - case shell_always_quoting_style: - STORE ('\''); - STORE ('\\'); - STORE ('\''); - break; - - default: - break; - } - break; - - case '%': case '+': case ',': case '-': case '.': case '/': - case '0': case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': case ':': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': - case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': - case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': - case 'o': case 'p': case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - /* These characters don't cause problems, no matter what the - quoting style is. They cannot start multibyte sequences. */ - break; - - default: - /* If we have a multibyte sequence, copy it until we reach - its end, find an error, or come back to the initial shift - state. For C-like styles, if the sequence has - unprintable characters, escape the whole sequence, since - we can't easily escape single characters within it. */ - { - /* Length of multibyte sequence found so far. */ - size_t m; - - bool printable; - - if (unibyte_locale) - { - m = 1; - printable = isprint (c) != 0; - } - else - { - mbstate_t mbstate; - memset (&mbstate, 0, sizeof mbstate); - - m = 0; - printable = true; - if (argsize == SIZE_MAX) - argsize = strlen (arg); - - do - { - wchar_t w; - size_t bytes = mbrtowc (&w, &arg[i + m], - argsize - (i + m), &mbstate); - if (bytes == 0) - break; - else if (bytes == (size_t) -1) - { - printable = false; - break; - } - else if (bytes == (size_t) -2) - { - printable = false; - while (i + m < argsize && arg[i + m]) - m++; - break; - } - else - { - /* Work around a bug with older shells that "see" a '\' - that is really the 2nd byte of a multibyte character. - In practice the problem is limited to ASCII - chars >= '@' that are shell special chars. */ - if ('[' == 0x5b && quoting_style == shell_quoting_style) - { - size_t j; - for (j = 1; j < bytes; j++) - switch (arg[i + m + j]) - { - case '[': case '\\': case '^': - case '`': case '|': - goto use_shell_always_quoting_style; - - default: - break; - } - } - - if (! iswprint (w)) - printable = false; - m += bytes; - } - } - while (! mbsinit (&mbstate)); - } - - if (1 < m || (backslash_escapes && ! printable)) - { - /* Output a multibyte sequence, or an escaped - unprintable unibyte character. */ - size_t ilim = i + m; - - for (;;) - { - if (backslash_escapes && ! printable) - { - STORE ('\\'); - STORE ('0' + (c >> 6)); - STORE ('0' + ((c >> 3) & 7)); - c = '0' + (c & 7); - } - if (ilim <= i + 1) - break; - STORE (c); - c = arg[++i]; - } - - goto store_c; - } - } - } - - if (! (backslash_escapes - && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) - goto store_c; + { + case '\0': + if (backslash_escapes) + { + START_ESC (); + /* If quote_string were to begin with digits, we'd need to + test for the end of the arg as well. However, it's + hard to imagine any locale that would use digits in + quotes, and set_custom_quoting is documented not to + accept them. Use only a single \0 with shell-escape + as currently digits are not printed within $'...' */ + if (quoting_style != shell_always_quoting_style + && i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9') + { + STORE ('0'); + STORE ('0'); + } + c = '0'; + /* We don't have to worry that this last '0' will be + backslash-escaped because, again, quote_string should + not start with it and because quote_these_too is + documented as not accepting it. */ + } + else if (flags & QA_ELIDE_NULL_BYTES) + continue; + break; + + case '?': + switch (quoting_style) + { + case shell_always_quoting_style: + if (elide_outer_quotes) + goto force_outer_quoting_style; + break; + + case c_quoting_style: + if ((flags & QA_SPLIT_TRIGRAPHS) + && i + 2 < argsize && arg[i + 1] == '?') + switch (arg[i + 2]) + { + case '!': case '\'': + case '(': case ')': case '-': case '/': + case '<': case '=': case '>': + /* Escape the second '?' in what would otherwise be + a trigraph. */ + if (elide_outer_quotes) + goto force_outer_quoting_style; + c = arg[i + 2]; + i += 2; + STORE ('?'); + STORE ('"'); + STORE ('"'); + STORE ('?'); + break; + + default: + break; + } + break; + + default: + break; + } + break; + + case '\a': esc = 'a'; goto c_escape; + case '\b': esc = 'b'; goto c_escape; + case '\f': esc = 'f'; goto c_escape; + case '\n': esc = 'n'; goto c_and_shell_escape; + case '\r': esc = 'r'; goto c_and_shell_escape; + case '\t': esc = 't'; goto c_and_shell_escape; + case '\v': esc = 'v'; goto c_escape; + case '\\': esc = c; + /* Never need to escape '\' in shell case. */ + if (quoting_style == shell_always_quoting_style) + { + if (elide_outer_quotes) + goto force_outer_quoting_style; + goto store_c; + } + + /* No need to escape the escape if we are trying to elide + outer quotes and nothing else is problematic. */ + if (backslash_escapes && elide_outer_quotes && quote_string_len) + goto store_c; + + c_and_shell_escape: + if (quoting_style == shell_always_quoting_style + && elide_outer_quotes) + goto force_outer_quoting_style; + /* Fall through. */ + c_escape: + if (backslash_escapes) + { + c = esc; + goto store_escape; + } + break; + + case '{': case '}': /* sometimes special if isolated */ + if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1)) + break; + /* Fall through. */ + case '#': case '~': + if (i != 0) + break; + /* Fall through. */ + case ' ': + case '!': /* special in bash */ + case '"': case '$': case '&': + case '(': case ')': case '*': case ';': + case '<': + case '=': /* sometimes special in 0th or (with "set -k") later args */ + case '>': case '[': + case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ + case '`': case '|': + /* A shell special character. In theory, '$' and '`' could + be the first bytes of multibyte characters, which means + we should check them with mbrtowc, but in practice this + doesn't happen so it's not worth worrying about. */ + if (quoting_style == shell_always_quoting_style + && elide_outer_quotes) + goto force_outer_quoting_style; + break; + + case '\'': + if (quoting_style == shell_always_quoting_style) + { + if (elide_outer_quotes) + goto force_outer_quoting_style; + STORE ('\''); + STORE ('\\'); + STORE ('\''); + pending_shell_escape_end = false; + } + break; + + case '%': case '+': case ',': case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': case ':': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': + case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': + case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': + /* These characters don't cause problems, no matter what the + quoting style is. They cannot start multibyte sequences. + A digit or a special letter would cause trouble if it + appeared at the beginning of quote_string because we'd then + escape by prepending a backslash. However, it's hard to + imagine any locale that would use digits or letters as + quotes, and set_custom_quoting is documented not to accept + them. Also, a digit or a special letter would cause + trouble if it appeared in quote_these_too, but that's also + documented as not accepting them. */ + break; + + default: + /* If we have a multibyte sequence, copy it until we reach + its end, find an error, or come back to the initial shift + state. For C-like styles, if the sequence has + unprintable characters, escape the whole sequence, since + we can't easily escape single characters within it. */ + { + /* Length of multibyte sequence found so far. */ + size_t m; + + bool printable; + + if (unibyte_locale) + { + m = 1; + printable = isprint (c) != 0; + } + else + { + mbstate_t mbstate; + memset (&mbstate, 0, sizeof mbstate); + + m = 0; + printable = true; + if (argsize == SIZE_MAX) + argsize = strlen (arg); + + do + { + wchar_t w; + size_t bytes = mbrtowc (&w, &arg[i + m], + argsize - (i + m), &mbstate); + if (bytes == 0) + break; + else if (bytes == (size_t) -1) + { + printable = false; + break; + } + else if (bytes == (size_t) -2) + { + printable = false; + while (i + m < argsize && arg[i + m]) + m++; + break; + } + else + { + /* Work around a bug with older shells that "see" a '\' + that is really the 2nd byte of a multibyte character. + In practice the problem is limited to ASCII + chars >= '@' that are shell special chars. */ + if ('[' == 0x5b && elide_outer_quotes + && quoting_style == shell_always_quoting_style) + { + size_t j; + for (j = 1; j < bytes; j++) + switch (arg[i + m + j]) + { + case '[': case '\\': case '^': + case '`': case '|': + goto force_outer_quoting_style; + + default: + break; + } + } + + if (! iswprint (w)) + printable = false; + m += bytes; + } + } + while (! mbsinit (&mbstate)); + } + + if (1 < m || (backslash_escapes && ! printable)) + { + /* Output a multibyte sequence, or an escaped + unprintable unibyte character. */ + size_t ilim = i + m; + + for (;;) + { + if (backslash_escapes && ! printable) + { + START_ESC (); + STORE ('0' + (c >> 6)); + STORE ('0' + ((c >> 3) & 7)); + c = '0' + (c & 7); + } + else if (is_right_quote) + { + STORE ('\\'); + is_right_quote = false; + } + if (ilim <= i + 1) + break; + END_ESC (); + STORE (c); + c = arg[++i]; + } + + goto store_c; + } + } + } + + if (! (((backslash_escapes && quoting_style != shell_always_quoting_style) + || elide_outer_quotes) + && quote_these_too + && quote_these_too[c / INT_BITS] >> (c % INT_BITS) & 1) + && !is_right_quote) + goto store_c; store_escape: - STORE ('\\'); + START_ESC (); store_c: + END_ESC (); STORE (c); } - if (i == 0 && quoting_style == shell_quoting_style) - goto use_shell_always_quoting_style; + if (len == 0 && quoting_style == shell_always_quoting_style + && elide_outer_quotes) + goto force_outer_quoting_style; - if (quote_string) + if (quote_string && !elide_outer_quotes) for (; *quote_string; quote_string++) STORE (*quote_string); @@ -503,9 +703,15 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, buffer[len] = '\0'; return len; - use_shell_always_quoting_style: + force_outer_quoting_style: + /* Don't reuse quote_these_too, since the addition of outer quotes + sufficiently quotes the specified characters. */ + if (quoting_style == shell_always_quoting_style && backslash_escapes) + quoting_style = shell_escape_always_quoting_style; return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, - shell_always_quoting_style, o); + quoting_style, + flags & ~QA_ELIDE_OUTER_QUOTES, NULL, + left_quote, right_quote); } /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of @@ -519,28 +725,51 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize, ARGSIZE. */ size_t quotearg_buffer (char *buffer, size_t buffersize, - char const *arg, size_t argsize, - struct quoting_options const *o) + char const *arg, size_t argsize, + struct quoting_options const *o) { struct quoting_options const *p = o ? o : &default_quoting_options; int e = errno; size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize, - p->style, p); + p->style, p->flags, p->quote_these_too, + p->left_quote, p->right_quote); errno = e; return r; } -/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly - allocated storage containing the quoted string. */ +/* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */ char * quotearg_alloc (char const *arg, size_t argsize, - struct quoting_options const *o) + struct quoting_options const *o) +{ + return quotearg_alloc_mem (arg, argsize, NULL, o); +} + +/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly + allocated storage containing the quoted string, and store the + resulting size into *SIZE, if non-NULL. The result can contain + embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not + NULL, and set_quoting_flags has not set the null byte elision + flag. */ +char * +quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size, + struct quoting_options const *o) { + struct quoting_options const *p = o ? o : &default_quoting_options; int e = errno; - size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1; + /* Elide embedded null bytes if we can't return a size. */ + int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES); + size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style, + flags, p->quote_these_too, + p->left_quote, + p->right_quote) + 1; char *buf = xcharalloc (bufsize); - quotearg_buffer (buf, bufsize, arg, argsize, o); + quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags, + p->quote_these_too, + p->left_quote, p->right_quote); errno = e; + if (size) + *size = bufsize - 1; return buf; } @@ -589,7 +818,7 @@ quotearg_free (void) to allow for future extensions (using negative values). */ static char * quotearg_n_options (int n, char const *arg, size_t argsize, - struct quoting_options const *options) + struct quoting_options const *options) { int e = errno; @@ -601,20 +830,20 @@ quotearg_n_options (int n, char const *arg, size_t argsize, if (nslots <= n0) { - /* FIXME: technically, the type of n1 should be `unsigned int', - but that evokes an unsuppressible warning from gcc-4.0.1 and - older. If gcc ever provides an option to suppress that warning, - revert to the original type, so that the test in xalloc_oversized - is once again performed only at compile time. */ + /* FIXME: technically, the type of n1 should be 'unsigned int', + but that evokes an unsuppressible warning from gcc-4.0.1 and + older. If gcc ever provides an option to suppress that warning, + revert to the original type, so that the test in xalloc_oversized + is once again performed only at compile time. */ size_t n1 = n0 + 1; bool preallocated = (sv == &slotvec0); if (xalloc_oversized (n1, sizeof *sv)) - xalloc_die (); + xalloc_die (); slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv); if (preallocated) - *sv = slotvec0; + *sv = slotvec0; memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv); nslots = n1; } @@ -622,15 +851,24 @@ quotearg_n_options (int n, char const *arg, size_t argsize, { size_t size = sv[n].size; char *val = sv[n].val; - size_t qsize = quotearg_buffer (val, size, arg, argsize, options); + /* Elide embedded null bytes since we don't return a size. */ + int flags = options->flags | QA_ELIDE_NULL_BYTES; + size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize, + options->style, flags, + options->quote_these_too, + options->left_quote, + options->right_quote); if (size <= qsize) { - sv[n].size = size = qsize + 1; - if (val != slot0) - free (val); - sv[n].val = val = xcharalloc (size); - quotearg_buffer (val, size, arg, argsize, options); + sv[n].size = size = qsize + 1; + if (val != slot0) + free (val); + sv[n].val = val = xcharalloc (size); + quotearg_buffer_restyled (val, size, arg, argsize, options->style, + flags, options->quote_these_too, + options->left_quote, + options->right_quote); } errno = e; @@ -645,19 +883,21 @@ quotearg_n (int n, char const *arg) } char * +quotearg_n_mem (int n, char const *arg, size_t argsize) +{ + return quotearg_n_options (n, arg, argsize, &default_quoting_options); +} + +char * quotearg (char const *arg) { return quotearg_n (0, arg); } -/* Return quoting options for STYLE, with no extra quoting. */ -static struct quoting_options -quoting_options_from_style (enum quoting_style style) +char * +quotearg_mem (char const *arg, size_t argsize) { - struct quoting_options o; - o.style = style; - memset (o.quote_these_too, 0, sizeof o.quote_these_too); - return o; + return quotearg_n_mem (0, arg, argsize); } char * @@ -669,7 +909,7 @@ quotearg_n_style (int n, enum quoting_style s, char const *arg) char * quotearg_n_style_mem (int n, enum quoting_style s, - char const *arg, size_t argsize) + char const *arg, size_t argsize) { struct quoting_options const o = quoting_options_from_style (s); return quotearg_n_options (n, arg, argsize, &o); @@ -682,12 +922,24 @@ quotearg_style (enum quoting_style s, char const *arg) } char * -quotearg_char (char const *arg, char ch) +quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize) +{ + return quotearg_n_style_mem (0, s, arg, argsize); +} + +char * +quotearg_char_mem (char const *arg, size_t argsize, char ch) { struct quoting_options options; options = default_quoting_options; set_char_quoting (&options, ch, 1); - return quotearg_n_options (0, arg, SIZE_MAX, &options); + return quotearg_n_options (0, arg, argsize, &options); +} + +char * +quotearg_char (char const *arg, char ch) +{ + return quotearg_char_mem (arg, SIZE_MAX, ch); } char * @@ -695,3 +947,85 @@ quotearg_colon (char const *arg) { return quotearg_char (arg, ':'); } + +char * +quotearg_colon_mem (char const *arg, size_t argsize) +{ + return quotearg_char_mem (arg, argsize, ':'); +} + +char * +quotearg_n_style_colon (int n, enum quoting_style s, char const *arg) +{ + struct quoting_options options; + options = quoting_options_from_style (s); + set_char_quoting (&options, ':', 1); + return quotearg_n_options (n, arg, SIZE_MAX, &options); +} + +char * +quotearg_n_custom (int n, char const *left_quote, + char const *right_quote, char const *arg) +{ + return quotearg_n_custom_mem (n, left_quote, right_quote, arg, + SIZE_MAX); +} + +char * +quotearg_n_custom_mem (int n, char const *left_quote, + char const *right_quote, + char const *arg, size_t argsize) +{ + struct quoting_options o = default_quoting_options; + set_custom_quoting (&o, left_quote, right_quote); + return quotearg_n_options (n, arg, argsize, &o); +} + +char * +quotearg_custom (char const *left_quote, char const *right_quote, + char const *arg) +{ + return quotearg_n_custom (0, left_quote, right_quote, arg); +} + +char * +quotearg_custom_mem (char const *left_quote, char const *right_quote, + char const *arg, size_t argsize) +{ + return quotearg_n_custom_mem (0, left_quote, right_quote, arg, + argsize); +} + + +/* The quoting option used by the functions of quote.h. */ +struct quoting_options quote_quoting_options = + { + locale_quoting_style, + 0, + { 0 }, + NULL, NULL + }; + +char const * +quote_n_mem (int n, char const *arg, size_t argsize) +{ + return quotearg_n_options (n, arg, argsize, "e_quoting_options); +} + +char const * +quote_mem (char const *arg, size_t argsize) +{ + return quote_n_mem (0, arg, argsize); +} + +char const * +quote_n (int n, char const *arg) +{ + return quote_n_mem (n, arg, SIZE_MAX); +} + +char const * +quote (char const *arg) +{ + return quote_n (0, arg); +} |