From 7fb1011e610a28518959b1d2d48cea17ecc32048 Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Thu, 20 Sep 2007 00:42:14 +0200 Subject: Rework unquote_c_style to work on a strbuf. If the gain is not obvious in the diffstat, the resulting code is more readable, _and_ in checkout-index/update-index we now reuse the same buffer to unquote strings instead of always freeing/mallocing. This also is more coherent with the next patch that reworks quoting functions. The quoting function is also made more efficient scanning for backslashes and treating portions of strings without a backslash at once. Signed-off-by: Pierre Habouzit --- quote.c | 104 +++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 51 insertions(+), 53 deletions(-) (limited to 'quote.c') diff --git a/quote.c b/quote.c index d88bf75159..7771c9c678 100644 --- a/quote.c +++ b/quote.c @@ -239,73 +239,71 @@ int quote_c_style(const char *name, char *outbuf, FILE *outfp, int no_dq) /* * C-style name unquoting. * - * Quoted should point at the opening double quote. Returns - * an allocated memory that holds unquoted name, which the caller - * should free when done. Updates endp pointer to point at - * one past the ending double quote if given. + * Quoted should point at the opening double quote. + * + Returns 0 if it was able to unquote the string properly, and appends the + * result in the strbuf `sb'. + * + Returns -1 in case of error, and doesn't touch the strbuf. Though note + * that this function will allocate memory in the strbuf, so calling + * strbuf_release is mandatory whichever result unquote_c_style returns. + * + * Updates endp pointer to point at one past the ending double quote if given. */ - -char *unquote_c_style(const char *quoted, const char **endp) +int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp) { - const char *sp; - char *name = NULL, *outp = NULL; - int count = 0, ch, ac; - -#undef EMIT -#define EMIT(c) (outp ? (*outp++ = (c)) : (count++)) + size_t oldlen = sb->len, len; + int ch, ac; if (*quoted++ != '"') - return NULL; + return -1; + + for (;;) { + len = strcspn(quoted, "\"\\"); + strbuf_add(sb, quoted, len); + quoted += len; - while (1) { - /* first pass counts and allocates, second pass fills */ - for (sp = quoted; (ch = *sp++) != '"'; ) { - if (ch == '\\') { - switch (ch = *sp++) { - case 'a': ch = '\a'; break; - case 'b': ch = '\b'; break; - case 'f': ch = '\f'; break; - case 'n': ch = '\n'; break; - case 'r': ch = '\r'; break; - case 't': ch = '\t'; break; - case 'v': ch = '\v'; break; - - case '\\': case '"': - break; /* verbatim */ - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - /* octal */ + switch (*quoted++) { + case '"': + if (endp) + *endp = quoted + 1; + return 0; + case '\\': + break; + default: + goto error; + } + + switch ((ch = *quoted++)) { + case 'a': ch = '\a'; break; + case 'b': ch = '\b'; break; + case 'f': ch = '\f'; break; + case 'n': ch = '\n'; break; + case 'r': ch = '\r'; break; + case 't': ch = '\t'; break; + case 'v': ch = '\v'; break; + + case '\\': case '"': + break; /* verbatim */ + + /* octal values with first digit over 4 overflow */ + case '0': case '1': case '2': case '3': ac = ((ch - '0') << 6); - if ((ch = *sp++) < '0' || '7' < ch) - return NULL; + if ((ch = *quoted++) < '0' || '7' < ch) + goto error; ac |= ((ch - '0') << 3); - if ((ch = *sp++) < '0' || '7' < ch) - return NULL; + if ((ch = *quoted++) < '0' || '7' < ch) + goto error; ac |= (ch - '0'); ch = ac; break; default: - return NULL; /* malformed */ - } + goto error; } - EMIT(ch); + strbuf_addch(sb, ch); } - if (name) { - *outp = 0; - if (endp) - *endp = sp; - return name; - } - outp = name = xmalloc(count + 1); - } + error: + strbuf_setlen(sb, oldlen); + return -1; } void write_name_quoted(const char *prefix, int prefix_len, -- cgit v1.2.1 From 663af3422a648e87945e4d8c0cc3e13671f2bbde Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Thu, 20 Sep 2007 00:42:15 +0200 Subject: Full rework of quote_c_style and write_name_quoted. * quote_c_style works on a strbuf instead of a wild buffer. * quote_c_style is now clever enough to not add double quotes if not needed. * write_name_quoted inherits those advantages, but also take a different set of arguments. Now instead of asking for quotes or not, you pass a "terminator". If it's \0 then we assume you don't want to escape, else C escaping is performed. In any case, the terminator is also appended to the stream. It also no longer takes the prefix/prefix_len arguments, as it's seldomly used, and makes some optimizations harder. * write_name_quotedpfx is created to work like write_name_quoted and take the prefix/prefix_len arguments. Thanks to those API changes, diff.c has somehow lost weight, thanks to the removal of functions that were wrappers around the old write_name_quoted trying to give it a semantics like the new one, but performing a lot of allocations for this goal. Now we always write directly to the stream, no intermediate allocation is performed. As a side effect of the refactor in builtin-apply.c, the length of the bar graphs in diffstats are not affected anymore by the fact that the path was clipped. Signed-off-by: Pierre Habouzit --- quote.c | 209 ++++++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 119 insertions(+), 90 deletions(-) (limited to 'quote.c') diff --git a/quote.c b/quote.c index 7771c9c678..b1efe188d4 100644 --- a/quote.c +++ b/quote.c @@ -157,83 +157,143 @@ char *sq_dequote(char *arg) } } +/* 1 means: quote as octal + * 0 means: quote as octal if (quote_path_fully) + * -1 means: never quote + * c: quote as "\\c" + */ +#define X8(x) x, x, x, x, x, x, x, x +#define X16(x) X8(x), X8(x) +static signed char const sq_lookup[256] = { + /* 0 1 2 3 4 5 6 7 */ + /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 'a', + /* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r', 1, 1, + /* 0x10 */ X16(1), + /* 0x20 */ -1, -1, '"', -1, -1, -1, -1, -1, + /* 0x28 */ X16(-1), X16(-1), X16(-1), + /* 0x58 */ -1, -1, -1, -1,'\\', -1, -1, -1, + /* 0x60 */ X16(-1), X8(-1), + /* 0x78 */ -1, -1, -1, -1, -1, -1, -1, 1, + /* 0x80 */ /* set to 0 */ +}; + +static inline int sq_must_quote(char c) { + return sq_lookup[(unsigned char)c] + quote_path_fully > 0; +} + +/* returns the longest prefix not needing a quote up to maxlen if positive. + This stops at the first \0 because it's marked as a character needing an + escape */ +static size_t next_quote_pos(const char *s, ssize_t maxlen) +{ + size_t len; + if (maxlen < 0) { + for (len = 0; !sq_must_quote(s[len]); len++); + } else { + for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++); + } + return len; +} + /* * C-style name quoting. * - * Does one of three things: - * - * (1) if outbuf and outfp are both NULL, inspect the input name and - * counts the number of bytes that are needed to hold c_style - * quoted version of name, counting the double quotes around - * it but not terminating NUL, and returns it. However, if name - * does not need c_style quoting, it returns 0. - * - * (2) if outbuf is not NULL, it must point at a buffer large enough - * to hold the c_style quoted version of name, enclosing double - * quotes, and terminating NUL. Fills outbuf with c_style quoted - * version of name enclosed in double-quote pair. Return value - * is undefined. + * (1) if sb and fp are both NULL, inspect the input name and counts the + * number of bytes that are needed to hold c_style quoted version of name, + * counting the double quotes around it but not terminating NUL, and + * returns it. + * However, if name does not need c_style quoting, it returns 0. * - * (3) if outfp is not NULL, outputs c_style quoted version of name, - * but not enclosed in double-quote pair. Return value is undefined. + * (2) if sb or fp are not NULL, it emits the c_style quoted version + * of name, enclosed with double quotes if asked and needed only. + * Return value is the same as in (1). */ - -static int quote_c_style_counted(const char *name, int namelen, - char *outbuf, FILE *outfp, int no_dq) +static size_t quote_c_style_counted(const char *name, ssize_t maxlen, + struct strbuf *sb, FILE *fp, int no_dq) { #undef EMIT -#define EMIT(c) \ - (outbuf ? (*outbuf++ = (c)) : outfp ? fputc(c, outfp) : (count++)) - -#define EMITQ() EMIT('\\') +#define EMIT(c) \ + do { \ + if (sb) strbuf_addch(sb, (c)); \ + if (fp) fputc((c), fp); \ + count++; \ + } while (0) +#define EMITBUF(s, l) \ + do { \ + if (sb) strbuf_add(sb, (s), (l)); \ + if (fp) fwrite((s), (l), 1, fp); \ + count += (l); \ + } while (0) + + size_t len, count = 0; + const char *p = name; - const char *sp; - unsigned char ch; - int count = 0, needquote = 0; + for (;;) { + int ch; - if (!no_dq) - EMIT('"'); - for (sp = name; sp < name + namelen; sp++) { - ch = *sp; - if (!ch) + len = next_quote_pos(p, maxlen); + if (len == maxlen || !p[len]) break; - if ((ch < ' ') || (ch == '"') || (ch == '\\') || - (quote_path_fully && (ch >= 0177))) { - needquote = 1; - switch (ch) { - case '\a': EMITQ(); ch = 'a'; break; - case '\b': EMITQ(); ch = 'b'; break; - case '\f': EMITQ(); ch = 'f'; break; - case '\n': EMITQ(); ch = 'n'; break; - case '\r': EMITQ(); ch = 'r'; break; - case '\t': EMITQ(); ch = 't'; break; - case '\v': EMITQ(); ch = 'v'; break; - - case '\\': /* fallthru */ - case '"': EMITQ(); break; - default: - /* octal */ - EMITQ(); - EMIT(((ch >> 6) & 03) + '0'); - EMIT(((ch >> 3) & 07) + '0'); - ch = (ch & 07) + '0'; - break; - } + + if (!no_dq && p == name) + EMIT('"'); + + EMITBUF(p, len); + EMIT('\\'); + p += len; + ch = (unsigned char)*p++; + if (sq_lookup[ch] >= ' ') { + EMIT(sq_lookup[ch]); + } else { + EMIT(((ch >> 6) & 03) + '0'); + EMIT(((ch >> 3) & 07) + '0'); + EMIT(((ch >> 0) & 07) + '0'); } - EMIT(ch); } + + EMITBUF(p, len); + if (p == name) /* no ending quote needed */ + return 0; + if (!no_dq) EMIT('"'); - if (outbuf) - *outbuf = 0; + return count; +} - return needquote ? count : 0; +size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq) +{ + return quote_c_style_counted(name, -1, sb, fp, nodq); } -int quote_c_style(const char *name, char *outbuf, FILE *outfp, int no_dq) +void write_name_quoted(const char *name, FILE *fp, int terminator) { - int cnt = strlen(name); - return quote_c_style_counted(name, cnt, outbuf, outfp, no_dq); + if (terminator) { + quote_c_style(name, NULL, fp, 0); + } else { + fputs(name, fp); + } + fputc(terminator, fp); +} + +extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, + const char *name, FILE *fp, int terminator) +{ + int needquote = 0; + + if (terminator) { + needquote = next_quote_pos(pfx, pfxlen) < pfxlen + || name[next_quote_pos(name, -1)]; + } + if (needquote) { + fputc('"', fp); + quote_c_style_counted(pfx, pfxlen, NULL, fp, 1); + quote_c_style(name, NULL, fp, 1); + fputc('"', fp); + } else { + fwrite(pfx, pfxlen, 1, fp); + fputs(name, fp); + } + fputc(terminator, fp); } /* @@ -306,37 +366,6 @@ int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp) return -1; } -void write_name_quoted(const char *prefix, int prefix_len, - const char *name, int quote, FILE *out) -{ - int needquote; - - if (!quote) { - no_quote: - if (prefix_len) - fprintf(out, "%.*s", prefix_len, prefix); - fputs(name, out); - return; - } - - needquote = 0; - if (prefix_len) - needquote = quote_c_style_counted(prefix, prefix_len, - NULL, NULL, 0); - if (!needquote) - needquote = quote_c_style(name, NULL, NULL, 0); - if (needquote) { - fputc('"', out); - if (prefix_len) - quote_c_style_counted(prefix, prefix_len, - NULL, out, 1); - quote_c_style(name, NULL, out, 1); - fputc('"', out); - } - else - goto no_quote; -} - /* quoting as a string literal for other languages */ void perl_quote_print(FILE *stream, const char *src) -- cgit v1.2.1 From 7a33bcbe802080f3a926e93d66b65ff7c5e8c5ed Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Thu, 20 Sep 2007 00:42:13 +0200 Subject: sq_quote_argv and add_to_string rework with strbuf's. * sq_quote_buf is made public, and works on a strbuf. * sq_quote_argv also works on a strbuf. * make sq_quote_argv take a "maxlen" argument to check the buffer won't grow too big. Signed-off-by: Pierre Habouzit Signed-off-by: Junio C Hamano --- quote.c | 94 +++++++++++++++++------------------------------------------------ 1 file changed, 24 insertions(+), 70 deletions(-) (limited to 'quote.c') diff --git a/quote.c b/quote.c index b1efe188d4..800fd88c9a 100644 --- a/quote.c +++ b/quote.c @@ -12,37 +12,31 @@ * a'b ==> a'\''b ==> 'a'\''b' * a!b ==> a'\!'b ==> 'a'\!'b' */ -#undef EMIT -#define EMIT(x) do { if (++len < n) *bp++ = (x); } while(0) - static inline int need_bs_quote(char c) { return (c == '\'' || c == '!'); } -static size_t sq_quote_buf(char *dst, size_t n, const char *src) +void sq_quote_buf(struct strbuf *dst, const char *src) { - char c; - char *bp = dst; - size_t len = 0; - - EMIT('\''); - while ((c = *src++)) { - if (need_bs_quote(c)) { - EMIT('\''); - EMIT('\\'); - EMIT(c); - EMIT('\''); - } else { - EMIT(c); + char *to_free = NULL; + + if (dst->buf == src) + to_free = strbuf_detach(dst); + + strbuf_addch(dst, '\''); + while (*src) { + size_t len = strcspn(src, "'\\"); + strbuf_add(dst, src, len); + src += len; + while (need_bs_quote(*src)) { + strbuf_addstr(dst, "'\\"); + strbuf_addch(dst, *src++); + strbuf_addch(dst, '\''); } } - EMIT('\''); - - if ( n ) - *bp = 0; - - return len; + strbuf_addch(dst, '\''); + free(to_free); } void sq_quote_print(FILE *stream, const char *src) @@ -62,11 +56,10 @@ void sq_quote_print(FILE *stream, const char *src) fputc('\'', stream); } -char *sq_quote_argv(const char** argv, int count) +void sq_quote_argv(struct strbuf *dst, const char** argv, int count, + size_t maxlen) { - char *buf, *to; int i; - size_t len = 0; /* Count argv if needed. */ if (count < 0) { @@ -74,53 +67,14 @@ char *sq_quote_argv(const char** argv, int count) ; /* just counting */ } - /* Special case: no argv. */ - if (!count) - return xcalloc(1,1); - - /* Get destination buffer length. */ - for (i = 0; i < count; i++) - len += sq_quote_buf(NULL, 0, argv[i]) + 1; - - /* Alloc destination buffer. */ - to = buf = xmalloc(len + 1); - /* Copy into destination buffer. */ + strbuf_grow(dst, 32 * count); for (i = 0; i < count; ++i) { - *to++ = ' '; - to += sq_quote_buf(to, len, argv[i]); + strbuf_addch(dst, ' '); + sq_quote_buf(dst, argv[i]); + if (maxlen && dst->len > maxlen) + die("Too many or long arguments"); } - - return buf; -} - -/* - * Append a string to a string buffer, with or without shell quoting. - * Return true if the buffer overflowed. - */ -int add_to_string(char **ptrp, int *sizep, const char *str, int quote) -{ - char *p = *ptrp; - int size = *sizep; - int oc; - int err = 0; - - if (quote) - oc = sq_quote_buf(p, size, str); - else { - oc = strlen(str); - memcpy(p, str, (size <= oc) ? size - 1 : oc); - } - - if (size <= oc) { - err = 1; - oc = size - 1; - } - - *ptrp += oc; - **ptrp = '\0'; - *sizep -= oc; - return err; } char *sq_dequote(char *arg) -- cgit v1.2.1 From b315c5c08139c0d3c1e4867a305334e29da01d07 Mon Sep 17 00:00:00 2001 From: Pierre Habouzit Date: Thu, 27 Sep 2007 12:58:23 +0200 Subject: strbuf change: be sure ->buf is never ever NULL. For that purpose, the ->buf is always initialized with a char * buf living in the strbuf module. It is made a char * so that we can sloppily accept things that perform: sb->buf[0] = '\0', and because you can't pass "" as an initializer for ->buf without making gcc unhappy for very good reasons. strbuf_init/_detach/_grow have been fixed to trust ->alloc and not ->buf anymore. as a consequence strbuf_detach is _mandatory_ to detach a buffer, copying ->buf isn't an option anymore, if ->buf is going to escape from the scope, and eventually be free'd. API changes: * strbuf_setlen now always works, so just make strbuf_reset a convenience macro. * strbuf_detatch takes a size_t* optional argument (meaning it can be NULL) to copy the buffer's len, as it was needed for this refactor to make the code more readable, and working like the callers. Signed-off-by: Pierre Habouzit Signed-off-by: Junio C Hamano --- quote.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'quote.c') diff --git a/quote.c b/quote.c index 800fd88c9a..482be05b7a 100644 --- a/quote.c +++ b/quote.c @@ -22,7 +22,7 @@ void sq_quote_buf(struct strbuf *dst, const char *src) char *to_free = NULL; if (dst->buf == src) - to_free = strbuf_detach(dst); + to_free = strbuf_detach(dst, NULL); strbuf_addch(dst, '\''); while (*src) { -- cgit v1.2.1