/*
- * TCP CUBIC: Binary Increase Congestion control for TCP v2.0
+ * TCP CUBIC: Binary Increase Congestion control for TCP v2.1
*
* This is from the implementation of CUBIC TCP in
* Injong Rhee, Lisong Xu.
*/
#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
-static int fast_convergence = 1;
-static int max_increment = 16;
-static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
-static int initial_ssthresh = 100;
-static int bic_scale = 41;
-static int tcp_friendliness = 1;
+static int fast_convergence __read_mostly = 1;
+static int max_increment __read_mostly = 16;
+static int beta __read_mostly = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
+static int initial_ssthresh __read_mostly = 100;
+static int bic_scale __read_mostly = 41;
+static int tcp_friendliness __read_mostly = 1;
-static u32 cube_rtt_scale;
-static u32 beta_scale;
-static u64 cube_factor;
+static u32 cube_rtt_scale __read_mostly;
+static u32 beta_scale __read_mostly;
+static u64 cube_factor __read_mostly;
/* Note parameters that are used for precomputing scale factors are read-only */
module_param(fast_convergence, int, 0644);
module_param(tcp_friendliness, int, 0644);
MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
-#include <asm/div64.h>
-
/* BIC TCP Parameters */
struct bictcp {
u32 cnt; /* increase cwnd by 1 after ACKs */
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
-/* 64bit divisor, dividend and result. dynamic precision */
-static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
- u_int32_t d = divisor;
-
- if (divisor > 0xffffffffULL) {
- unsigned int shift = fls(divisor >> 32);
-
- d = divisor >> shift;
- dividend >>= shift;
- }
-
- /* avoid 64 bit division if possible */
- if (dividend >> 32)
- do_div(dividend, d);
- else
- dividend = (uint32_t) dividend / d;
-
- return dividend;
-}
-
-/*
- * calculate the cubic root of x using Newton-Raphson
+/* calculate the cubic root of x using a table lookup followed by one
+ * Newton-Raphson iteration.
+ * Avg err ~= 0.195%
*/
static u32 cubic_root(u64 a)
{
- u32 x, x1;
-
- /* Initial estimate is based on:
- * cbrt(x) = exp(log(x) / 3)
+ u32 x, b, shift;
+ /*
+ * cbrt(x) MSB values for x MSB values in [0..63].
+ * Precomputed then refined by hand - Willy Tarreau
+ *
+ * For x in [0..63],
+ * v = cbrt(x << 18) - 1
+ * cbrt(x) = (v[x] + 10) >> 6
*/
- x = 1u << (fls64(a)/3);
+ static const u8 v[] = {
+ /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118,
+ /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156,
+ /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179,
+ /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199,
+ /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215,
+ /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229,
+ /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242,
+ /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254,
+ };
+
+ b = fls64(a);
+ if (b < 7) {
+ /* a in [0..63] */
+ return ((u32)v[(u32)a] + 35) >> 6;
+ }
+
+ b = ((b * 84) >> 8) - 1;
+ shift = (a >> (b * 3));
+
+ x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
/*
- * Iteration based on:
+ * Newton-Raphson iteration
* 2
* x = ( 2 * x + a / x ) / 3
* k+1 k k
*/
- do {
- x1 = x;
- x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
- } while (abs(x1 - x) > 1);
-
+ x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1)));
+ x = ((x * 341) >> 10);
return x;
}
}
}
- /* cubic function - calc*/
- /* calculate c * time^3 / rtt,
- * while considering overflow in calculation of time^3
+ /* cubic function - calc*/
+ /* calculate c * time^3 / rtt,
+ * while considering overflow in calculation of time^3
* (so time^3 is done by using 64 bit)
* and without the support of division of 64bit numbers
* (so all divisions are done by using 32 bit)
- * also NOTE the unit of those veriables
- * time = (t - K) / 2^bictcp_HZ
- * c = bic_scale >> 10
+ * also NOTE the unit of those veriables
+ * time = (t - K) / 2^bictcp_HZ
+ * c = bic_scale >> 10
* rtt = (srtt >> 3) / HZ
* !!! The following code does not have overflow problems,
* if the cwnd < 1 million packets !!!
- */
+ */
/* change the unit from HZ to bictcp_HZ */
- t = ((tcp_time_stamp + ca->delay_min - ca->epoch_start)
+ t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start)
<< BICTCP_HZ) / HZ;
- if (t < ca->bic_K) /* t - K */
+ if (t < ca->bic_K) /* t - K */
offs = ca->bic_K - t;
- else
- offs = t - ca->bic_K;
+ else
+ offs = t - ca->bic_K;
/* c/rtt * (t-K)^3 */
delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
- if (t < ca->bic_K) /* below origin*/
- bic_target = ca->bic_origin_point - delta;
- else /* above origin*/
- bic_target = ca->bic_origin_point + delta;
+ if (t < ca->bic_K) /* below origin*/
+ bic_target = ca->bic_origin_point - delta;
+ else /* above origin*/
+ bic_target = ca->bic_origin_point + delta;
- /* cubic function - calc bictcp_cnt*/
- if (bic_target > cwnd) {
+ /* cubic function - calc bictcp_cnt*/
+ if (bic_target > cwnd) {
ca->cnt = cwnd / (bic_target - cwnd);
- } else {
- ca->cnt = 100 * cwnd; /* very small increment*/
- }
+ } else {
+ ca->cnt = 100 * cwnd; /* very small increment*/
+ }
if (ca->delay_min > 0) {
/* max increment = Smax * rtt / 0.1 */
min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
- if (ca->cnt < min_cnt)
+
+ /* use concave growth when the target is above the origin */
+ if (ca->cnt < min_cnt && t >= ca->bic_K)
ca->cnt = min_cnt;
}
- /* slow start and low utilization */
+ /* slow start and low utilization */
if (ca->loss_cwnd == 0) /* could be aggressive in slow start */
ca->cnt = 50;
if (tcp_friendliness) {
u32 scale = beta_scale;
delta = (cwnd * scale) >> 3;
- while (ca->ack_cnt > delta) { /* update tcp cwnd */
- ca->ack_cnt -= delta;
- ca->tcp_cwnd++;
+ while (ca->ack_cnt > delta) { /* update tcp cwnd */
+ ca->ack_cnt -= delta;
+ ca->tcp_cwnd++;
}
if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */
if (ca->cnt > max_cnt)
ca->cnt = max_cnt;
}
- }
+ }
ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack;
if (ca->cnt == 0) /* cannot be zero */
(s32)(tcp_time_stamp - ca->epoch_start) < HZ)
return;
- delay = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+ delay = (tcp_time_stamp - tp->rx_opt.rcv_tsecr)<<3;
if (delay == 0)
delay = 1;
/* Track delayed acknowledgment ratio using sliding window
* ratio = (15*ratio + sample) / 16
*/
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
- cube_rtt_scale = (bic_scale << 3) / 10; /* 1024*c/rtt */
+ cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */
/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
* so K = cubic_root( (wmax-cwnd)*rtt/c )
MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("CUBIC TCP");
-MODULE_VERSION("2.0");
+MODULE_VERSION("2.1");