From c54b4b7655447c1f24f6d50779c22eba9ee0fd24 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 14 Mar 2011 07:52:13 +0000
Subject: tcp_cubic: fix comparison of jiffies

Jiffies wraps around therefore the correct way to compare is
to use cast to signed value.

Note: cubic is not using full jiffies value on 64 bit arch
because using full unsigned long makes struct bictcp grow too
large for the available ca_priv area.

Includes correction from Sangtae Ha to improve ack train detection.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net/ipv4/tcp_cubic.c')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 71d5f2f29fa6..43bb34c9b8bf 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -342,9 +342,11 @@ static void hystart_update(struct sock *sk, u32 delay)
 		u32 curr_jiffies = jiffies;
 
 		/* first detection parameter - ack-train detection */
-		if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) {
+		if ((s32)(curr_jiffies - ca->last_jiffies) <=
+		    msecs_to_jiffies(2)) {
 			ca->last_jiffies = curr_jiffies;
-			if (curr_jiffies - ca->round_start >= ca->delay_min>>4)
+			if ((s32) (curr_jiffies - ca->round_start) >
+			    ca->delay_min >> 4)
 				ca->found |= HYSTART_ACK_TRAIN;
 		}
 
-- 
cgit v1.2.1


From aac46324e12a2bf2e9e0855ad6a287945e34ad39 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 14 Mar 2011 07:52:14 +0000
Subject: tcp_cubic: make ack train delta value a parameter

Make the spacing between ACK's that indicates a train a tuneable
value like other hystart values.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net/ipv4/tcp_cubic.c')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 43bb34c9b8bf..66d3b00233ec 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -52,6 +52,7 @@ static int tcp_friendliness __read_mostly = 1;
 static int hystart __read_mostly = 1;
 static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
 static int hystart_low_window __read_mostly = 16;
+static int hystart_ack_delta __read_mostly = 2;
 
 static u32 cube_rtt_scale __read_mostly;
 static u32 beta_scale __read_mostly;
@@ -75,6 +76,8 @@ MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
 		 " 1: packet-train 2: delay 3: both packet-train and delay");
 module_param(hystart_low_window, int, 0644);
 MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
+module_param(hystart_ack_delta, int, 0644);
+MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
 
 /* BIC TCP Parameters */
 struct bictcp {
@@ -343,7 +346,7 @@ static void hystart_update(struct sock *sk, u32 delay)
 
 		/* first detection parameter - ack-train detection */
 		if ((s32)(curr_jiffies - ca->last_jiffies) <=
-		    msecs_to_jiffies(2)) {
+		    msecs_to_jiffies(hystart_ack_delta)) {
 			ca->last_jiffies = curr_jiffies;
 			if ((s32) (curr_jiffies - ca->round_start) >
 			    ca->delay_min >> 4)
-- 
cgit v1.2.1


From 17a6e9f1aa9ba07ca13a1eaf1e631e743af50cca Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 14 Mar 2011 07:52:15 +0000
Subject: tcp_cubic: fix clock dependency

The hystart code was written with assumption that HZ=1000.
Replace the use of jiffies with bictcp_clock as a millisecond
real time clock.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Reported-by: Lucas Nussbaum <lucas.nussbaum@loria.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

(limited to 'net/ipv4/tcp_cubic.c')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 66d3b00233ec..f4fb2d4b90ad 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -88,7 +88,7 @@ struct bictcp {
 	u32	last_time;	/* time when updated last_cwnd */
 	u32	bic_origin_point;/* origin point of bic function */
 	u32	bic_K;		/* time to origin point from the beginning of the current epoch */
-	u32	delay_min;	/* min delay */
+	u32	delay_min;	/* min delay (msec << 3) */
 	u32	epoch_start;	/* beginning of an epoch */
 	u32	ack_cnt;	/* number of acks */
 	u32	tcp_cwnd;	/* estimated tcp cwnd */
@@ -98,7 +98,7 @@ struct bictcp {
 	u8	found;		/* the exit point is found? */
 	u32	round_start;	/* beginning of each round */
 	u32	end_seq;	/* end_seq of the round */
-	u32	last_jiffies;	/* last time when the ACK spacing is close */
+	u32	last_ack;	/* last time when the ACK spacing is close */
 	u32	curr_rtt;	/* the minimum rtt of current round */
 };
 
@@ -119,12 +119,21 @@ static inline void bictcp_reset(struct bictcp *ca)
 	ca->found = 0;
 }
 
+static inline u32 bictcp_clock(void)
+{
+#if HZ < 1000
+	return ktime_to_ms(ktime_get_real());
+#else
+	return jiffies_to_msecs(jiffies);
+#endif
+}
+
 static inline void bictcp_hystart_reset(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
 
-	ca->round_start = ca->last_jiffies = jiffies;
+	ca->round_start = ca->last_ack = bictcp_clock();
 	ca->end_seq = tp->snd_nxt;
 	ca->curr_rtt = 0;
 	ca->sample_cnt = 0;
@@ -239,8 +248,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 	 */
 
 	/* change the unit from HZ to bictcp_HZ */
-	t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start)
-	     << BICTCP_HZ) / HZ;
+	t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3)
+	      - ca->epoch_start) << BICTCP_HZ) / HZ;
 
 	if (t < ca->bic_K)		/* t - K */
 		offs = ca->bic_K - t;
@@ -342,14 +351,12 @@ static void hystart_update(struct sock *sk, u32 delay)
 	struct bictcp *ca = inet_csk_ca(sk);
 
 	if (!(ca->found & hystart_detect)) {
-		u32 curr_jiffies = jiffies;
+		u32 now = bictcp_clock();
 
 		/* first detection parameter - ack-train detection */
-		if ((s32)(curr_jiffies - ca->last_jiffies) <=
-		    msecs_to_jiffies(hystart_ack_delta)) {
-			ca->last_jiffies = curr_jiffies;
-			if ((s32) (curr_jiffies - ca->round_start) >
-			    ca->delay_min >> 4)
+		if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
+			ca->last_ack = now;
+			if ((s32)(now - ca->round_start) > ca->delay_min >> 4)
 				ca->found |= HYSTART_ACK_TRAIN;
 		}
 
@@ -396,7 +403,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 	if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
 		return;
 
-	delay = usecs_to_jiffies(rtt_us) << 3;
+	delay = (rtt_us << 3) / USEC_PER_MSEC;
 	if (delay == 0)
 		delay = 1;
 
-- 
cgit v1.2.1


From 3b585b34493ec9db382d6c325d4ed77b9eb2d2a5 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 14 Mar 2011 07:52:16 +0000
Subject: tcp_cubic: enable high resolution ack time if needed

This is a refined version of an earlier patch by Lucas Nussbaum.
Cubic needs RTT values in milliseconds. If HZ < 1000 then
the values will be too coarse.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Reported-by: Lucas Nussbaum <lucas.nussbaum@loria.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net/ipv4/tcp_cubic.c')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index f4fb2d4b90ad..5e0491d742ca 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -459,6 +459,10 @@ static int __init cubictcp_register(void)
 	/* divide by bic_scale and by constant Srtt (100ms) */
 	do_div(cube_factor, bic_scale * 10);
 
+	/* hystart needs ms clock resolution */
+	if (hystart && HZ < 1000)
+		cubictcp.flags |= TCP_CONG_RTT_STAMP;
+
 	return tcp_register_congestion_control(&cubictcp);
 }
 
-- 
cgit v1.2.1


From 2b4636a5f8ca547000f6aba24ec1c58f31f4a91d Mon Sep 17 00:00:00 2001
From: Sangtae Ha <sangtae.ha@gmail.com>
Date: Mon, 14 Mar 2011 07:52:17 +0000
Subject: tcp_cubic: make the delay threshold of HyStart less sensitive

Make HyStart less sensitive to abrupt delay variations due to buffer bloat.

Signed-off-by: Sangtae Ha <sangtae.ha@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Reported-by: Lucas Nussbaum <lucas.nussbaum@loria.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4/tcp_cubic.c')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 5e0491d742ca..7172c129ff19 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -39,7 +39,7 @@
 
 /* Number of delay samples for detecting the increase of delay */
 #define HYSTART_MIN_SAMPLES	8
-#define HYSTART_DELAY_MIN	(2U<<3)
+#define HYSTART_DELAY_MIN	(4U<<3)
 #define HYSTART_DELAY_MAX	(16U<<3)
 #define HYSTART_DELAY_THRESH(x)	clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
 
-- 
cgit v1.2.1


From b5ccd07337489fa9c9d32e0b628a2168b7953adf Mon Sep 17 00:00:00 2001
From: Sangtae Ha <sangtae.ha@gmail.com>
Date: Mon, 14 Mar 2011 07:52:18 +0000
Subject: tcp_cubic: fix low utilization of CUBIC with HyStart

HyStart sets the initial exit point of slow start.
Suppose that HyStart exits at 0.5BDP in a BDP network and no history exists.
If the BDP of a network is large, CUBIC's initial cwnd growth may be
too conservative to utilize the link.
CUBIC increases the cwnd 20% per RTT in this case.

Signed-off-by: Sangtae Ha <sangtae.ha@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net/ipv4/tcp_cubic.c')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 7172c129ff19..90d92dd4cf13 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -270,6 +270,13 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 		ca->cnt = 100 * cwnd;              /* very small increment*/
 	}
 
+	/*
+	 * The initial growth of cubic function may be too conservative
+	 * when the available bandwidth is still unknown.
+	 */
+	if (ca->loss_cwnd == 0 && ca->cnt > 20)
+		ca->cnt = 20;	/* increase cwnd 5% per RTT */
+
 	/* TCP Friendly */
 	if (tcp_friendliness) {
 		u32 scale = beta_scale;
-- 
cgit v1.2.1