From cc969c8dc1cfd1f60b1dab8ee3117172d9d1bb4f Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 20 May 2015 03:36:43 -0400
Subject: t5551: factor out tag creation

One of our tests in t5551 creates a large number of tags,
and jumps through some hoops to do it efficiently. Let's
factor that out into a function so we can make other similar
tests.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5551-http-fetch-smart.sh | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

(limited to 't')
diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
index 6cbc12d9a7..c34e930bb9 100755
--- a/t/t5551-http-fetch-smart.sh
+++ b/t/t5551-http-fetch-smart.sh
@@ -213,27 +213,35 @@ test_expect_success 'cookies stored in http.cookiefile when http.savecookies set
 	test_cmp expect_cookies.txt cookies_tail.txt
 '
 
-test_expect_success EXPENSIVE 'create 50,000 tags in the repo' '
-	(
-	cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
-	for i in `test_seq 50000`
+# create an arbitrary number of tags, numbered from tag-$1 to tag-$2
+create_tags () {
+	rm -f marks &&
+	for i in $(test_seq "$1" "$2")
 	do
-		echo "commit refs/heads/too-many-refs"
-		echo "mark :$i"
-		echo "committer git <git@example.com> $i +0000"
-		echo "data 0"
-		echo "M 644 inline bla.txt"
-		echo "data 4"
-		echo "bla"
+		# don't use here-doc, because it requires a process
+		# per loop iteration
+		echo "commit refs/heads/too-many-refs-$1" &&
+		echo "mark :$i" &&
+		echo "committer git <git@example.com> $i +0000" &&
+		echo "data 0" &&
+		echo "M 644 inline bla.txt" &&
+		echo "data 4" &&
+		echo "bla" &&
 		# make every commit dangling by always
 		# rewinding the branch after each commit
-		echo "reset refs/heads/too-many-refs"
-		echo "from :1"
+		echo "reset refs/heads/too-many-refs-$1" &&
+		echo "from :$1"
 	done | git fast-import --export-marks=marks &&
 
 	# now assign tags to all the dangling commits we created above
 	tag=$(perl -e "print \"bla\" x 30") &&
 	sed -e "s|^:\([^ ]*\) \(.*\)$|\2 refs/tags/$tag-\1|" <marks >>packed-refs
+}
+
+test_expect_success 'create 50,000 tags in the repo' '
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
+		create_tags 1 50000
 	)
 '
 
-- 
cgit v1.2.1


From 6bc0cb5176a5e42ca4a74e3558e8f0790ed09bb1 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 20 May 2015 03:37:09 -0400
Subject: http-backend: spool ref negotiation requests to buffer

When http-backend spawns "upload-pack" to do ref
negotiation, it streams the http request body to
upload-pack, who then streams the http response back to the
client as it reads. In theory, git can go full-duplex; the
client can consume our response while it is still sending
the request.  In practice, however, HTTP is a half-duplex
protocol. Even if our client is ready to read and write
simultaneously, we may have other HTTP infrastructure in the
way, including the webserver that spawns our CGI, or any
intermediate proxies.

In at least one documented case[1], this leads to deadlock
when trying a fetch over http. What happens is basically:

  1. Apache proxies the request to the CGI, http-backend.

  2. http-backend gzip-inflates the data and sends
     the result to upload-pack.

  3. upload-pack acts on the data and generates output over
     the pipe back to Apache. Apache isn't reading because
     it's busy writing (step 1).

This works fine most of the time, because the upload-pack
output ends up in a system pipe buffer, and Apache reads
it as soon as it finishes writing. But if both the request
and the response exceed the system pipe buffer size, then we
deadlock (Apache blocks writing to http-backend,
http-backend blocks writing to upload-pack, and upload-pack
blocks writing to Apache).

We need to break the deadlock by spooling either the input
or the output. In this case, it's ideal to spool the input,
because Apache does not start reading either stdout _or_
stderr until we have consumed all of the input. So until we
do so, we cannot even get an error message out to the
client.

The solution is fairly straight-forward: we read the request
body into an in-memory buffer in http-backend, freeing up
Apache, and then feed the data ourselves to upload-pack. But
there are a few important things to note:

  1. We limit the in-memory buffer to prevent an obvious
     denial-of-service attack. This is a new hard limit on
     requests, but it's unlikely to come into play. The
     default value is 10MB, which covers even the ridiculous
     100,000-ref negotation in the included test (that
     actually caps out just over 5MB). But it's configurable
     on the off chance that you don't mind spending some
     extra memory to make even ridiculous requests work.

  2. We must take care only to buffer when we have to. For
     pushes, the incoming packfile may be of arbitrary
     size, and we should connect the input directly to
     receive-pack. There's no deadlock problem here, though,
     because we do not produce any output until the whole
     packfile has been read.

     For upload-pack's initial ref advertisement, we
     similarly do not need to buffer. Even though we may
     generate a lot of output, there is no request body at
     all (i.e., it is a GET, not a POST).

[1] http://article.gmane.org/gmane.comp.version-control.git/269020

Test-adapted-from: Dennis Kaarsemaker <dennis@kaarsemaker.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t5551-http-fetch-smart.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 't')

diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
index c34e930bb9..2d8e5a0fbe 100755
--- a/t/t5551-http-fetch-smart.sh
+++ b/t/t5551-http-fetch-smart.sh
@@ -253,5 +253,16 @@ test_expect_success EXPENSIVE 'clone the 50,000 tag repo to check OS command lin
 	)
 '
 
+test_expect_success EXPENSIVE 'http can handle enormous ref negotiation' '
+	git -C too-many-refs fetch -q --tags &&
+	(
+		cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
+		create_tags 50001 100000
+	) &&
+	git -C too-many-refs fetch -q --tags &&
+	git -C too-many-refs for-each-ref refs/tags >tags &&
+	test_line_count = 100000 tags
+'
+
 stop_httpd
 test_done
-- 
cgit v1.2.1