diff options
author | Eric Wong <normalperson@yhbt.net> | 2009-05-28 00:56:23 -0700 |
---|---|---|
committer | Eric Wong <normalperson@yhbt.net> | 2009-05-28 00:57:07 -0700 |
commit | b510df8af2c86d441de87fa4be09f786b5411527 (patch) | |
tree | 44ac92ead768529ffefe3f09db23426db2c9a647 | |
parent | 33fd7169ed6658e898b414a66aefaad16b404ec5 (diff) | |
download | git-b510df8af2c86d441de87fa4be09f786b5411527.tar.gz |
git-svn: refuse to dcommit non-UTF-8 messages
...without i18n.commitencoding set in the config.
SVN tries to store all commit messages in UTF-8, however it is
up to the job of the clients to enforce this rule. SVN servers
themselves do not always enforce this; allowing clients to
commit malformed UTF-8 messages and break repositories.
So git-svn will enforce this and tell the user to set
i18n.commitencoding when a git commit is is not in UTF-8.
Signed-off-by: Eric Wong <normalperson@yhbt.net>
-rwxr-xr-x | git-svn.perl | 17 | ||||
-rwxr-xr-x | t/t9139-git-svn-non-utf8-commitencoding.sh | 47 |
2 files changed, 61 insertions, 3 deletions
diff --git a/git-svn.perl b/git-svn.perl index a70c7d7b2c..33017974d0 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -1178,16 +1178,27 @@ sub get_commit_entry { } rename $commit_editmsg, $commit_msg or croak $!; { + require Encode; # SVN requires messages to be UTF-8 when entering the repo local $/; open $log_fh, '<', $commit_msg or croak $!; binmode $log_fh; chomp($log_entry{log} = <$log_fh>); - if (my $enc = Git::config('i18n.commitencoding')) { - require Encode; - Encode::from_to($log_entry{log}, $enc, 'UTF-8'); + my $enc = Git::config('i18n.commitencoding') || 'UTF-8'; + my $msg = $log_entry{log}; + + eval { $msg = Encode::decode($enc, $msg, 1) }; + if ($@) { + die "Could not decode as $enc:\n", $msg, + "\nPerhaps you need to set i18n.commitencoding\n"; } + + eval { $msg = Encode::encode('UTF-8', $msg, 1) }; + die "Could not encode as UTF-8:\n$msg\n" if $@; + + $log_entry{log} = $msg; + close $log_fh or croak $!; } unlink $commit_msg; diff --git a/t/t9139-git-svn-non-utf8-commitencoding.sh b/t/t9139-git-svn-non-utf8-commitencoding.sh new file mode 100755 index 0000000000..2b1db97337 --- /dev/null +++ b/t/t9139-git-svn-non-utf8-commitencoding.sh @@ -0,0 +1,47 @@ +#!/bin/sh +# +# Copyright (c) 2009 Eric Wong + +test_description='git svn refuses to dcommit non-UTF8 messages' + +. ./lib-git-svn.sh + +# ISO-2022-JP can pass for valid UTF-8, so skipping that in this test + +for H in ISO-8859-1 EUCJP +do + test_expect_success "$H setup" ' + mkdir $H && + svn_cmd import -m "$H test" $H "$svnrepo"/$H && + git svn clone "$svnrepo"/$H $H + ' +done + +for H in ISO-8859-1 EUCJP +do + test_expect_success "$H commit on git side" ' + ( + cd $H && + git config i18n.commitencoding $H && + git checkout -b t refs/remotes/git-svn && + echo $H >F && + git add F && + git commit -a -F "$TEST_DIRECTORY"/t3900/$H.txt && + E=$(git cat-file commit HEAD | sed -ne "s/^encoding //p") && + test "z$E" = "z$H" + ) + ' +done + +for H in ISO-8859-1 EUCJP +do + test_expect_success "$H dcommit to svn" ' + ( + cd $H && + git config --unset i18n.commitencoding && + ! git svn dcommit + ) + ' +done + +test_done |