diff options
| -rw-r--r-- | Documentation/config.txt | 13 | ||||
| -rw-r--r-- | Documentation/git-cvsserver.txt | 14 | ||||
| -rwxr-xr-x | git-cvsserver.perl | 193 | ||||
| -rwxr-xr-x | t/t9401-git-cvsserver-crlf.sh | 159 | 
4 files changed, 354 insertions, 25 deletions
| diff --git a/Documentation/config.txt b/Documentation/config.txt index 036e61e2f6..917ef5b4fe 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -670,11 +670,14 @@ gitcvs.usecrlfattr  	then 'gitcvs.allbinary' is used. See linkgit:gitattribute[5].  gitcvs.allbinary:: -	If true, all files not otherwise specified using -	'gitcvs.usecrlfattr' and an explicitly set or unset `crlf` -	attribute are sent to the client in mode '-kb'. This -	causes the client to treat them as binary files which -	suppresses any newline munging it otherwise might do. +	This is used if 'gitcvs.usecrlfattr' does not resolve +	the correct '-kb' mode to use. If true, all +	unresolved files are sent to the client in +	mode '-kb'. This causes the client to treat them +	as binary files, which suppresses any newline munging it +	otherwise might do. Alternatively, if it is set to "guess", +	then the contents of the file are examined to decide if +	it is binary, similar to 'core.autocrlf'.  gitcvs.dbname::  	Database used by git-cvsserver to cache revision information diff --git a/Documentation/git-cvsserver.txt b/Documentation/git-cvsserver.txt index 4888b8604d..cd0685ea67 100644 --- a/Documentation/git-cvsserver.txt +++ b/Documentation/git-cvsserver.txt @@ -311,17 +311,23 @@ to crlf conversion on some platforms.  You can make the server use `crlf` attributes to set the '-k' modes  for files by setting the `gitcvs.usecrlfattr` config variable.  In this case, if `crlf` is explicitly unset ('-crlf'), then the -will set '-kb' mode, for binary files.  If it `crlf` is set, +server will set '-kb' mode for binary files. If `crlf` is set,  then the '-k' mode will explicitly be left blank.  See  also linkgit:gitattributes[5] for more information about the `crlf`  attribute.  Alternatively, if `gitcvs.usecrlfattr` config is not enabled  or if the `crlf` attribute is unspecified for a filename, then -the server uses the `gitcvs.allbinary` for the default setting. -If `gitcvs.allbinary` is set, then the files not otherwise +the server uses the `gitcvs.allbinary` config for the default setting. +If `gitcvs.allbinary` is set, then file not otherwise  specified will default to '-kb' mode. Otherwise the '-k' mode -is left blank. +is left blank. But if `gitcvs.allbinary` is set to "guess", then +the correct '-k' mode will be guessed based on the contents of +the file. + +For best consistency with cvs, it is probably best to override the +defaults by setting `gitcvs.usecrlfattr` to true, +and `gitcvs.allbinary` to "guess".  Dependencies  ------------ diff --git a/git-cvsserver.perl b/git-cvsserver.perl index 58206aed7c..920bbe15a3 100755 --- a/git-cvsserver.perl +++ b/git-cvsserver.perl @@ -502,7 +502,7 @@ sub req_add                  print $state->{CVSROOT} . "/$state->{module}/$filename\n";                  # this is an "entries" line -                my $kopts = kopts_from_path($filename); +                my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});                  $log->debug("/$filepart/1.$meta->{revision}//$kopts/");                  print "/$filepart/1.$meta->{revision}//$kopts/\n";                  # permissions @@ -533,7 +533,8 @@ sub req_add          print "Checked-in $dirpart\n";          print "$filename\n"; -        my $kopts = kopts_from_path($filename); +        my $kopts = kopts_from_path($filename,"file", +                        $state->{entries}{$filename}{modified_filename});          print "/$filepart/0//$kopts/\n";          my $requestedKopts = $state->{opt}{k}; @@ -631,7 +632,7 @@ sub req_remove          print "Checked-in $dirpart\n";          print "$filename\n"; -        my $kopts = kopts_from_path($filename); +        my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});          print "/$filepart/-1.$wrev//$kopts/\n";          $rmcount++; @@ -910,7 +911,7 @@ sub req_co         print $state->{CVSROOT} . "/$module/" . ( defined ( $git->{dir} ) and $git->{dir} ne "./" ? $git->{dir} . "/" : "" ) . "$git->{name}\n";          # this is an "entries" line -        my $kopts = kopts_from_path($fullName); +        my $kopts = kopts_from_path($fullName,"sha1",$git->{filehash});          print "/$git->{name}/1.$git->{revision}//$kopts/\n";          # permissions          print "u=$git->{mode},g=$git->{mode},o=$git->{mode}\n"; @@ -1119,7 +1120,7 @@ sub req_update  		print $state->{CVSROOT} . "/$state->{module}/$filename\n";  		# this is an "entries" line -		my $kopts = kopts_from_path($filename); +		my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});  		$log->debug("/$filepart/1.$meta->{revision}//$kopts/");  		print "/$filepart/1.$meta->{revision}//$kopts/\n"; @@ -1167,7 +1168,8 @@ sub req_update                      print "Merged $dirpart\n";                      $log->debug($state->{CVSROOT} . "/$state->{module}/$filename");                      print $state->{CVSROOT} . "/$state->{module}/$filename\n"; -                    my $kopts = kopts_from_path("$dirpart/$filepart"); +                    my $kopts = kopts_from_path("$dirpart/$filepart", +                                                "file",$mergedFile);                      $log->debug("/$filepart/1.$meta->{revision}//$kopts/");                      print "/$filepart/1.$meta->{revision}//$kopts/\n";                  } @@ -1183,7 +1185,8 @@ sub req_update                  {                      print "Merged $dirpart\n";                      print $state->{CVSROOT} . "/$state->{module}/$filename\n"; -                    my $kopts = kopts_from_path("$dirpart/$filepart"); +                    my $kopts = kopts_from_path("$dirpart/$filepart", +                                                "file",$mergedFile);                      print "/$filepart/1.$meta->{revision}/+/$kopts/\n";                  }              } @@ -1434,7 +1437,7 @@ sub req_ci              }              print "Checked-in $dirpart\n";              print "$filename\n"; -            my $kopts = kopts_from_path($filename); +            my $kopts = kopts_from_path($filename,"sha1",$meta->{filehash});              print "/$filepart/1.$meta->{revision}//$kopts/\n";          }      } @@ -2312,7 +2315,7 @@ sub cleanupTmpDir  # file should get -kb.  sub kopts_from_path  { -	my ($path) = @_; +    my ($path, $srcType, $name) = @_;      if ( defined ( $cfg->{gitcvs}{usecrlfattr} ) and           $cfg->{gitcvs}{usecrlfattr} =~ /\s*(1|true|yes)\s*$/i ) @@ -2332,15 +2335,55 @@ sub kopts_from_path          }      } -    unless ( defined ( $cfg->{gitcvs}{allbinary} ) and $cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i ) +    if ( defined ( $cfg->{gitcvs}{allbinary} ) )      { -		# Return "" to give no special treatment to any path -		return ""; -    } else { -		# Alternatively, to have all files treated as if they are binary (which -		# is more like git itself), always return the "-kb" option -		return "-kb"; +        if( ($cfg->{gitcvs}{allbinary} =~ /^\s*(1|true|yes)\s*$/i) ) +        { +            return "-kb"; +        } +        elsif( ($cfg->{gitcvs}{allbinary} =~ /^\s*guess\s*$/i) ) +        { +            if( $srcType eq "sha1Or-k" && +                !defined($name) ) +            { +                my ($ret)=$state->{entries}{$path}{options}; +                if( !defined($ret) ) +                { +                    $ret=$state->{opt}{k}; +                    if(defined($ret)) +                    { +                        $ret="-k$ret"; +                    } +                    else +                    { +                        $ret=""; +                    } +                } +                if( ! ($ret=~/^(|-kb|-kkv|-kkvl|-kk|-ko|-kv)$/) ) +                { +                    print "E Bad -k option\n"; +                    $log->warn("Bad -k option: $ret"); +                    die "Error: Bad -k option: $ret\n"; +                } + +                return $ret; +            } +            else +            { +                if( is_binary($srcType,$name) ) +                { +                    $log->debug("... as binary"); +                    return "-kb"; +                } +                else +                { +                    $log->debug("... as text"); +                } +            } +        }      } +    # Return "" to give no special treatment to any path +    return "";  }  sub check_attr @@ -2360,6 +2403,124 @@ sub check_attr      }  } +# This should have the same heuristics as convert.c:is_binary() and related. +# Note that the bare CR test is done by callers in convert.c. +sub is_binary +{ +    my ($srcType,$name) = @_; +    $log->debug("is_binary($srcType,$name)"); + +    # Minimize amount of interpreted code run in the inner per-character +    # loop for large files, by totalling each character value and +    # then analyzing the totals. +    my @counts; +    my $i; +    for($i=0;$i<256;$i++) +    { +        $counts[$i]=0; +    } + +    my $fh = open_blob_or_die($srcType,$name); +    my $line; +    while( defined($line=<$fh>) ) +    { +        # Any '\0' and bare CR are considered binary. +        if( $line =~ /\0|(\r[^\n])/ ) +        { +            close($fh); +            return 1; +        } + +        # Count up each character in the line: +        my $len=length($line); +        for($i=0;$i<$len;$i++) +        { +            $counts[ord(substr($line,$i,1))]++; +        } +    } +    close $fh; + +    # Don't count CR and LF as either printable/nonprintable +    $counts[ord("\n")]=0; +    $counts[ord("\r")]=0; + +    # Categorize individual character count into printable and nonprintable: +    my $printable=0; +    my $nonprintable=0; +    for($i=0;$i<256;$i++) +    { +        if( $i < 32 && +            $i != ord("\b") && +            $i != ord("\t") && +            $i != 033 &&       # ESC +            $i != 014 )        # FF +        { +            $nonprintable+=$counts[$i]; +        } +        elsif( $i==127 )  # DEL +        { +            $nonprintable+=$counts[$i]; +        } +        else +        { +            $printable+=$counts[$i]; +        } +    } + +    return ($printable >> 7) < $nonprintable; +} + +# Returns open file handle.  Possible invocations: +#  - open_blob_or_die("file",$filename); +#  - open_blob_or_die("sha1",$filehash); +sub open_blob_or_die +{ +    my ($srcType,$name) = @_; +    my ($fh); +    if( $srcType eq "file" ) +    { +        if( !open $fh,"<",$name ) +        { +            $log->warn("Unable to open file $name: $!"); +            die "Unable to open file $name: $!\n"; +        } +    } +    elsif( $srcType eq "sha1" || $srcType eq "sha1Or-k" ) +    { +        unless ( defined ( $name ) and $name =~ /^[a-zA-Z0-9]{40}$/ ) +        { +            $log->warn("Need filehash"); +            die "Need filehash\n"; +        } + +        my $type = `git cat-file -t $name`; +        chomp $type; + +        unless ( defined ( $type ) and $type eq "blob" ) +        { +            $log->warn("Invalid type '$type' for '$name'"); +            die ( "Invalid type '$type' (expected 'blob')" ) +        } + +        my $size = `git cat-file -s $name`; +        chomp $size; + +        $log->debug("open_blob_or_die($name) size=$size, type=$type"); + +        unless( open $fh, '-|', "git", "cat-file", "blob", $name ) +        { +            $log->warn("Unable to open sha1 $name"); +            die "Unable to open sha1 $name\n"; +        } +    } +    else +    { +        $log->warn("Unknown type of blob source: $srcType"); +        die "Unknown type of blob source: $srcType\n"; +    } +    return $fh; +} +  # Generate a CVS author name from Git author information, by taking  # the first eight characters of the user part of the email address.  sub cvs_author diff --git a/t/t9401-git-cvsserver-crlf.sh b/t/t9401-git-cvsserver-crlf.sh index b7a779b788..e27a1c5f85 100755 --- a/t/t9401-git-cvsserver-crlf.sh +++ b/t/t9401-git-cvsserver-crlf.sh @@ -175,4 +175,163 @@ test_expect_success 'updating' '      cmp cvswork/binfile.bin tmpExpect1  ' +rm -rf cvswork +test_expect_success 'cvs co (use attributes/guess)' ' +    GIT_DIR="$SERVERDIR" git config gitcvs.allbinary guess && +    GIT_CONFIG="$git_config" cvs -Q co -d cvswork master >cvs.log 2>&1 && +    marked_as cvswork textfile.c "" && +    marked_as cvswork binfile.bin -kb && +    marked_as cvswork .gitattributes "" && +    marked_as cvswork mixedUp.c "" && +    marked_as cvswork/subdir withCr.bin -kb && +    marked_as cvswork/subdir file.h "" && +    marked_as cvswork/subdir unspecified.other "" && +    marked_as cvswork/subdir newfile.bin -kb && +    marked_as cvswork/subdir newfile.c "" +' + +test_expect_success 'setup multi-line files' ' +    ( echo "line 1" && +      echo "line 2" && +      echo "line 3" && +      echo "line 4 with NUL: Q <-" ) | q_to_nul > multiline.c && +    git add multiline.c && +    ( echo "line 1" && +      echo "line 2" && +      echo "line 3" && +      echo "line 4" ) | q_to_nul > multilineTxt.c && +    git add multilineTxt.c && +    git commit -q -m "multiline files" && +    git push gitcvs.git >/dev/null +' + +rm -rf cvswork +test_expect_success 'cvs co (guess)' ' +    GIT_DIR="$SERVERDIR" git config --bool gitcvs.usecrlfattr false && +    GIT_CONFIG="$git_config" cvs -Q co -d cvswork master >cvs.log 2>&1 && +    marked_as cvswork textfile.c "" && +    marked_as cvswork binfile.bin -kb && +    marked_as cvswork .gitattributes "" && +    marked_as cvswork mixedUp.c -kb && +    marked_as cvswork multiline.c -kb && +    marked_as cvswork multilineTxt.c "" && +    marked_as cvswork/subdir withCr.bin -kb && +    marked_as cvswork/subdir file.h "" && +    marked_as cvswork/subdir unspecified.other "" && +    marked_as cvswork/subdir newfile.bin "" && +    marked_as cvswork/subdir newfile.c "" +' + +test_expect_success 'cvs co another copy (guess)' ' +    GIT_CONFIG="$git_config" cvs -Q co -d cvswork2 master >cvs.log 2>&1 && +    marked_as cvswork2 textfile.c "" && +    marked_as cvswork2 binfile.bin -kb && +    marked_as cvswork2 .gitattributes "" && +    marked_as cvswork2 mixedUp.c -kb && +    marked_as cvswork2 multiline.c -kb && +    marked_as cvswork2 multilineTxt.c "" && +    marked_as cvswork2/subdir withCr.bin -kb && +    marked_as cvswork2/subdir file.h "" && +    marked_as cvswork2/subdir unspecified.other "" && +    marked_as cvswork2/subdir newfile.bin "" && +    marked_as cvswork2/subdir newfile.c "" +' + +test_expect_success 'add text (guess)' ' +    cd cvswork && +    echo "simpleText" > simpleText.c && +    GIT_CONFIG="$git_config" cvs -Q add simpleText.c && +    cd .. && +    marked_as cvswork simpleText.c "" +' + +test_expect_success 'add bin (guess)' ' +    cd cvswork && +    echo "simpleBin: NUL: Q <- there" | q_to_nul > simpleBin.bin && +    GIT_CONFIG="$git_config" cvs -Q add simpleBin.bin && +    cd .. && +    marked_as cvswork simpleBin.bin -kb +' + +test_expect_success 'remove files (guess)' ' +    cd cvswork && +    GIT_CONFIG="$git_config" cvs -Q rm -f subdir/file.h && +    cd subdir && +    GIT_CONFIG="$git_config" cvs -Q rm -f withCr.bin && +    cd ../.. && +    marked_as cvswork/subdir withCr.bin -kb && +    marked_as cvswork/subdir file.h "" +' + +test_expect_success 'cvs ci (guess)' ' +    cd cvswork && +    GIT_CONFIG="$git_config" cvs -Q ci -m "add/rm files" >cvs.log 2>&1 && +    cd .. && +    marked_as cvswork textfile.c "" && +    marked_as cvswork binfile.bin -kb && +    marked_as cvswork .gitattributes "" && +    marked_as cvswork mixedUp.c -kb && +    marked_as cvswork multiline.c -kb && +    marked_as cvswork multilineTxt.c "" && +    not_present cvswork/subdir withCr.bin && +    not_present cvswork/subdir file.h && +    marked_as cvswork/subdir unspecified.other "" && +    marked_as cvswork/subdir newfile.bin "" && +    marked_as cvswork/subdir newfile.c "" && +    marked_as cvswork simpleBin.bin -kb && +    marked_as cvswork simpleText.c "" +' + +test_expect_success 'update subdir of other copy (guess)' ' +    cd cvswork2/subdir && +    GIT_CONFIG="$git_config" cvs -Q update && +    cd ../.. && +    marked_as cvswork2 textfile.c "" && +    marked_as cvswork2 binfile.bin -kb && +    marked_as cvswork2 .gitattributes "" && +    marked_as cvswork2 mixedUp.c -kb && +    marked_as cvswork2 multiline.c -kb && +    marked_as cvswork2 multilineTxt.c "" && +    not_present cvswork2/subdir withCr.bin && +    not_present cvswork2/subdir file.h && +    marked_as cvswork2/subdir unspecified.other "" && +    marked_as cvswork2/subdir newfile.bin "" && +    marked_as cvswork2/subdir newfile.c "" && +    not_present cvswork2 simpleBin.bin && +    not_present cvswork2 simpleText.c +' + +echo "starting update/merge" >> "${WORKDIR}/marked.log" +test_expect_success 'update/merge full other copy (guess)' ' +    git pull gitcvs.git master && +    sed "s/3/replaced_3/" < multilineTxt.c > ml.temp && +    mv ml.temp multilineTxt.c && +    git add multilineTxt.c && +    git commit -q -m "modify multiline file" >> "${WORKDIR}/marked.log" && +    git push gitcvs.git >/dev/null && +    cd cvswork2 && +    sed "s/1/replaced_1/" < multilineTxt.c > ml.temp && +    mv ml.temp multilineTxt.c && +    GIT_CONFIG="$git_config" cvs update > cvs.log 2>&1 && +    cd .. && +    marked_as cvswork2 textfile.c "" && +    marked_as cvswork2 binfile.bin -kb && +    marked_as cvswork2 .gitattributes "" && +    marked_as cvswork2 mixedUp.c -kb && +    marked_as cvswork2 multiline.c -kb && +    marked_as cvswork2 multilineTxt.c "" && +    not_present cvswork2/subdir withCr.bin && +    not_present cvswork2/subdir file.h && +    marked_as cvswork2/subdir unspecified.other "" && +    marked_as cvswork2/subdir newfile.bin "" && +    marked_as cvswork2/subdir newfile.c "" && +    marked_as cvswork2 simpleBin.bin -kb && +    marked_as cvswork2 simpleText.c "" && +    echo "line replaced_1" > tmpExpect2 && +    echo "line 2" >> tmpExpect2 && +    echo "line replaced_3" >> tmpExpect2 && +    echo "line 4" | q_to_nul >> tmpExpect2 && +    cmp cvswork2/multilineTxt.c tmpExpect2 +' +  test_done | 
