diff options
-rw-r--r-- | commit.c | 82 | ||||
-rwxr-xr-x | git-commit.sh | 7 | ||||
-rwxr-xr-x | git-revert.sh | 14 | ||||
-rwxr-xr-x | t/t3901-8859-1.txt | 4 | ||||
-rwxr-xr-x | t/t3901-i18n-patch.sh | 255 | ||||
-rwxr-xr-x | t/t3901-utf8.txt | 4 |
6 files changed, 333 insertions, 33 deletions
@@ -464,20 +464,29 @@ static int get_one_line(const char *msg, unsigned long len) return ret; } +/* High bit set, or ISO-2022-INT */ +static int non_ascii(int ch) +{ + ch = (ch & 0xff); + return ((ch & 0x80) || (ch == 0x1b)); +} + static int is_rfc2047_special(char ch) { - return ((ch & 0x80) || (ch == '=') || (ch == '?') || (ch == '_')); + return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_')); } -static int add_rfc2047(char *buf, const char *line, int len) +static int add_rfc2047(char *buf, const char *line, int len, + const char *encoding) { char *bp = buf; int i, needquote; - static const char q_utf8[] = "=?utf-8?q?"; + char q_encoding[128]; + const char *q_encoding_fmt = "=?%s?q?"; for (i = needquote = 0; !needquote && i < len; i++) { - unsigned ch = line[i]; - if (ch & 0x80) + int ch = line[i]; + if (non_ascii(ch)) needquote++; if ((i + 1 < len) && (ch == '=' && line[i+1] == '?')) @@ -486,8 +495,11 @@ static int add_rfc2047(char *buf, const char *line, int len) if (!needquote) return sprintf(buf, "%.*s", len, line); - memcpy(bp, q_utf8, sizeof(q_utf8)-1); - bp += sizeof(q_utf8)-1; + i = snprintf(q_encoding, sizeof(q_encoding), q_encoding_fmt, encoding); + if (sizeof(q_encoding) < i) + die("Insanely long encoding name %s", encoding); + memcpy(bp, q_encoding, i); + bp += i; for (i = 0; i < len; i++) { unsigned ch = line[i] & 0xFF; if (is_rfc2047_special(ch)) { @@ -505,7 +517,8 @@ static int add_rfc2047(char *buf, const char *line, int len) } static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf, - const char *line, int relative_date) + const char *line, int relative_date, + const char *encoding) { char *date; int namelen; @@ -533,7 +546,8 @@ static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf, filler = ""; strcpy(buf, "From: "); ret = strlen(buf); - ret += add_rfc2047(buf + ret, line, display_name_length); + ret += add_rfc2047(buf + ret, line, display_name_length, + encoding); memcpy(buf + ret, name_tail, namelen - display_name_length); ret += namelen - display_name_length; buf[ret++] = '\n'; @@ -668,21 +682,18 @@ static char *replace_encoding_header(char *buf, char *encoding) return buf; } -static char *logmsg_reencode(const struct commit *commit) +static char *logmsg_reencode(const struct commit *commit, + char *output_encoding) { char *encoding; char *out; - char *output_encoding = (git_log_output_encoding - ? git_log_output_encoding - : git_commit_encoding); + char *utf8 = "utf-8"; - if (!output_encoding) - output_encoding = "utf-8"; - else if (!*output_encoding) + if (!*output_encoding) return NULL; encoding = get_header(commit, "encoding"); if (!encoding) - return NULL; + encoding = utf8; if (!strcmp(encoding, output_encoding)) out = strdup(commit->buffer); else @@ -691,7 +702,8 @@ static char *logmsg_reencode(const struct commit *commit) if (out) out = replace_encoding_header(out, output_encoding); - free(encoding); + if (encoding != utf8) + free(encoding); if (!out) return NULL; return out; @@ -711,8 +723,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, int parents_shown = 0; const char *msg = commit->buffer; int plain_non_ascii = 0; - char *reencoded = logmsg_reencode(commit); + char *reencoded; + char *encoding; + encoding = (git_log_output_encoding + ? git_log_output_encoding + : git_commit_encoding); + if (!encoding) + encoding = "utf-8"; + reencoded = logmsg_reencode(commit, encoding); if (reencoded) msg = reencoded; @@ -738,7 +757,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, i + 1 < len && msg[i+1] == '\n') in_body = 1; } - else if (ch & 0x80) { + else if (non_ascii(ch)) { plain_non_ascii = 1; break; } @@ -797,13 +816,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, offset += add_user_info("Author", fmt, buf + offset, line + 7, - relative_date); + relative_date, + encoding); if (!memcmp(line, "committer ", 10) && (fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER)) offset += add_user_info("Commit", fmt, buf + offset, line + 10, - relative_date); + relative_date, + encoding); continue; } @@ -826,7 +847,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, int slen = strlen(subject); memcpy(buf + offset, subject, slen); offset += slen; - offset += add_rfc2047(buf + offset, line, linelen); + offset += add_rfc2047(buf + offset, line, linelen, + encoding); } else { memset(buf + offset, ' ', indent); @@ -837,11 +859,17 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, if (fmt == CMIT_FMT_ONELINE) break; if (subject && plain_non_ascii) { - static const char header[] = - "Content-Type: text/plain; charset=UTF-8\n" + int sz; + char header[512]; + const char *header_fmt = + "Content-Type: text/plain; charset=%s\n" "Content-Transfer-Encoding: 8bit\n"; - memcpy(buf + offset, header, sizeof(header)-1); - offset += sizeof(header)-1; + sz = snprintf(header, sizeof(header), header_fmt, + encoding); + if (sizeof(header) < sz) + die("Encoding name %s too long", encoding); + memcpy(buf + offset, header, sz); + offset += sz; } if (after_subject) { int slen = strlen(after_subject); diff --git a/git-commit.sh b/git-commit.sh index 9fdf234b52..e23918cd6c 100755 --- a/git-commit.sh +++ b/git-commit.sh @@ -429,7 +429,9 @@ then fi elif test "$use_commit" != "" then - git-cat-file commit "$use_commit" | sed -e '1,/^$/d' + encoding=$(git repo-config i18n.commitencoding || echo UTF-8) + git show -s --pretty=raw --encoding="$encoding" "$use_commit" | + sed -e '1,/^$/d' -e 's/^ //' elif test -f "$GIT_DIR/MERGE_MSG" then cat "$GIT_DIR/MERGE_MSG" @@ -491,7 +493,8 @@ then q } ' - set_author_env=`git-cat-file commit "$use_commit" | + encoding=$(git repo-config i18n.commitencoding || echo UTF-8) + set_author_env=`git show -s --pretty=raw --encoding="$encoding" "$use_commit" | LANG=C LC_ALL=C sed -ne "$pick_author_script"` eval "$set_author_env" export GIT_AUTHOR_NAME diff --git a/git-revert.sh b/git-revert.sh index 224e6540ca..71cbcbc2b8 100755 --- a/git-revert.sh +++ b/git-revert.sh @@ -81,6 +81,8 @@ prev=$(git-rev-parse --verify "$commit^1" 2>/dev/null) || git-rev-parse --verify "$commit^2" >/dev/null 2>&1 && die "Cannot run $me a multi-parent commit." +encoding=$(git repo-config i18n.commitencoding || echo UTF-8) + # "commit" is an existing commit. We would want to apply # the difference it introduces since its first parent "prev" # on top of the current HEAD if we are cherry-pick. Or the @@ -88,10 +90,11 @@ git-rev-parse --verify "$commit^2" >/dev/null 2>&1 && case "$me" in revert) - git-rev-list --pretty=oneline --max-count=1 $commit | + git show -s --pretty=oneline --encoding="$encoding" $commit | sed -e ' s/^[^ ]* /Revert "/ - s/$/"/' + s/$/"/ + ' echo echo "This reverts commit $commit." test "$rev" = "$commit" || @@ -120,14 +123,17 @@ cherry-pick) q }' - set_author_env=`git-cat-file commit "$commit" | + + logmsg=`git show -s --pretty=raw --encoding="$encoding" "$commit"` + set_author_env=`echo "$logmsg" | LANG=C LC_ALL=C sed -ne "$pick_author_script"` eval "$set_author_env" export GIT_AUTHOR_NAME export GIT_AUTHOR_EMAIL export GIT_AUTHOR_DATE - git-cat-file commit $commit | sed -e '1,/^$/d' + echo "$logmsg" | + sed -e '1,/^$/d' -e 's/^ //' case "$replay" in '') echo "(cherry picked from commit $commit)" diff --git a/t/t3901-8859-1.txt b/t/t3901-8859-1.txt new file mode 100755 index 0000000000..38c21a6a7f --- /dev/null +++ b/t/t3901-8859-1.txt @@ -0,0 +1,4 @@ +: to be sourced in t3901 -- this is latin-1 +GIT_AUTHOR_NAME="Αιν σϊ" && +GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME && +export GIT_AUTHOR_NAME GIT_COMMITTER_NAME diff --git a/t/t3901-i18n-patch.sh b/t/t3901-i18n-patch.sh new file mode 100755 index 0000000000..eda0e2d729 --- /dev/null +++ b/t/t3901-i18n-patch.sh @@ -0,0 +1,255 @@ +#!/bin/sh +# +# Copyright (c) 2006 Junio C Hamano +# + +test_description='i18n settings and format-patch | am pipe' + +. ./test-lib.sh + +check_encoding () { + # Make sure characters are not corrupted + cnt="$1" header="$2" i=1 j=0 bad=0 + while test "$i" -le $cnt + do + git format-patch --encoding=UTF-8 --stdout HEAD~$i..HEAD~$j | + grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" && + git-cat-file commit HEAD~$j | + case "$header" in + 8859) + grep "^encoding ISO-8859-1" ;; + *) + ! grep "^encoding ISO-8859-1" ;; + esac || { + bad=1 + break + } + j=$i + i=$(($i+1)) + done + (exit $bad) +} + +test_expect_success setup ' + git-repo-config i18n.commitencoding UTF-8 && + + # use UTF-8 in author and committer name to match the + # i18n.commitencoding settings + . ../t3901-utf8.txt && + + test_tick && + echo "$GIT_AUTHOR_NAME" >mine && + git add mine && + git commit -s -m "Initial commit" && + + test_tick && + echo Hello world >mine && + git add mine && + git commit -s -m "Second on main" && + + # the first commit on the side branch is UTF-8 + test_tick && + git checkout -b side master^ && + echo Another file >yours && + git add yours && + git commit -s -m "Second on side" && + + # the second one on the side branch is ISO-8859-1 + git-repo-config i18n.commitencoding ISO-8859-1 && + # use author and committer name in ISO-8859-1 to match it. + . ../t3901-8859-1.txt && + test_tick && + echo Yet another >theirs && + git add theirs && + git commit -s -m "Third on side" && + + # Back to default + git-repo-config i18n.commitencoding UTF-8 +' + +test_expect_success 'format-patch output (ISO-8859-1)' ' + git-repo-config i18n.logoutputencoding ISO-8859-1 && + + git format-patch --stdout master..HEAD^ >out-l1 && + git format-patch --stdout HEAD^ >out-l2 && + grep "^Content-Type: text/plain; charset=ISO-8859-1" out-l1 && + grep "^From: =?ISO-8859-1?q?=C1=E9=ED_=F3=FA?=" out-l1 && + grep "^Content-Type: text/plain; charset=ISO-8859-1" out-l2 && + grep "^From: =?ISO-8859-1?q?=C1=E9=ED_=F3=FA?=" out-l2 +' + +test_expect_success 'format-patch output (UTF-8)' ' + git repo-config i18n.logoutputencoding UTF-8 && + + git format-patch --stdout master..HEAD^ >out-u1 && + git format-patch --stdout HEAD^ >out-u2 && + grep "^Content-Type: text/plain; charset=UTF-8" out-u1 && + grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" out-u1 && + grep "^Content-Type: text/plain; charset=UTF-8" out-u2 && + grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" out-u2 +' + +test_expect_success 'rebase (U/U)' ' + # We want the result of rebase in UTF-8 + git-repo-config i18n.commitencoding UTF-8 && + + # The test is about logoutputencoding not affecting the + # final outcome -- it is used internally to generate the + # patch and the log. + + git repo-config i18n.logoutputencoding UTF-8 && + + # The result will be committed by GIT_COMMITTER_NAME -- + # we want UTF-8 encoded name. + . ../t3901-utf8.txt && + git checkout -b test && + git-rebase master && + + check_encoding 2 +' + +test_expect_success 'rebase (U/L)' ' + git-repo-config i18n.commitencoding UTF-8 && + git repo-config i18n.logoutputencoding ISO-8859-1 && + . ../t3901-utf8.txt && + + git reset --hard side && + git-rebase master && + + check_encoding 2 +' + +test_expect_success 'rebase (L/L)' ' + # In this test we want ISO-8859-1 encoded commits as the result + git-repo-config i18n.commitencoding ISO-8859-1 && + git repo-config i18n.logoutputencoding ISO-8859-1 && + . ../t3901-8859-1.txt && + + git reset --hard side && + git-rebase master && + + check_encoding 2 8859 +' + +test_expect_success 'rebase (L/U)' ' + # This is pathological -- use UTF-8 as intermediate form + # to get ISO-8859-1 results. + git-repo-config i18n.commitencoding ISO-8859-1 && + git repo-config i18n.logoutputencoding UTF-8 && + . ../t3901-8859-1.txt && + + git reset --hard side && + git-rebase master && + + check_encoding 2 8859 +' + +test_expect_success 'cherry-pick(U/U)' ' + # Both the commitencoding and logoutputencoding is set to UTF-8. + + git-repo-config i18n.commitencoding UTF-8 && + git repo-config i18n.logoutputencoding UTF-8 && + . ../t3901-utf8.txt && + + git reset --hard master && + git cherry-pick side^ && + git cherry-pick side && + EDITOR=: VISUAL=: git revert HEAD && + + check_encoding 3 +' + +test_expect_success 'cherry-pick(L/L)' ' + # Both the commitencoding and logoutputencoding is set to ISO-8859-1 + + git-repo-config i18n.commitencoding ISO-8859-1 && + git repo-config i18n.logoutputencoding ISO-8859-1 && + . ../t3901-8859-1.txt && + + git reset --hard master && + git cherry-pick side^ && + git cherry-pick side && + EDITOR=: VISUAL=: git revert HEAD && + + check_encoding 3 8859 +' + +test_expect_success 'cherry-pick(U/L)' ' + # Commitencoding is set to UTF-8 but logoutputencoding is ISO-8859-1 + + git-repo-config i18n.commitencoding UTF-8 && + git repo-config i18n.logoutputencoding ISO-8859-1 && + . ../t3901-utf8.txt && + + git reset --hard master && + git cherry-pick side^ && + git cherry-pick side && + EDITOR=: VISUAL=: git revert HEAD && + + check_encoding 3 +' + +test_expect_success 'cherry-pick(L/U)' ' + # Again, the commitencoding is set to ISO-8859-1 but + # logoutputencoding is set to UTF-8. + + git-repo-config i18n.commitencoding ISO-8859-1 && + git repo-config i18n.logoutputencoding UTF-8 && + . ../t3901-8859-1.txt && + + git reset --hard master && + git cherry-pick side^ && + git cherry-pick side && + EDITOR=: VISUAL=: git revert HEAD && + + check_encoding 3 8859 +' + +test_expect_success 'rebase --merge (U/U)' ' + git-repo-config i18n.commitencoding UTF-8 && + git repo-config i18n.logoutputencoding UTF-8 && + . ../t3901-utf8.txt && + + git reset --hard side && + git-rebase --merge master && + + check_encoding 2 +' + +test_expect_success 'rebase --merge (U/L)' ' + git-repo-config i18n.commitencoding UTF-8 && + git repo-config i18n.logoutputencoding ISO-8859-1 && + . ../t3901-utf8.txt && + + git reset --hard side && + git-rebase --merge master && + + check_encoding 2 +' + +test_expect_success 'rebase --merge (L/L)' ' + # In this test we want ISO-8859-1 encoded commits as the result + git-repo-config i18n.commitencoding ISO-8859-1 && + git repo-config i18n.logoutputencoding ISO-8859-1 && + . ../t3901-8859-1.txt && + + git reset --hard side && + git-rebase --merge master && + + check_encoding 2 8859 +' + +test_expect_success 'rebase --merge (L/U)' ' + # This is pathological -- use UTF-8 as intermediate form + # to get ISO-8859-1 results. + git-repo-config i18n.commitencoding ISO-8859-1 && + git repo-config i18n.logoutputencoding UTF-8 && + . ../t3901-8859-1.txt && + + git reset --hard side && + git-rebase --merge master && + + check_encoding 2 8859 +' + +test_done diff --git a/t/t3901-utf8.txt b/t/t3901-utf8.txt new file mode 100755 index 0000000000..5f5205cd02 --- /dev/null +++ b/t/t3901-utf8.txt @@ -0,0 +1,4 @@ +: to be sourced in t3901 -- this is utf8 +GIT_AUTHOR_NAME="Γéà óú" && +GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME && +export GIT_AUTHOR_NAME GIT_COMMITTER_NAME |