#include <linux/list.h>
#include <net/tcp.h>
+int sysctl_tcp_max_ssthresh = 0;
+
static DEFINE_SPINLOCK(tcp_cong_list_lock);
static LIST_HEAD(tcp_cong_list);
}
/*
- * Attach new congestion control algorthim to the list
+ * Attach new congestion control algorithm to the list
* of available options.
*/
int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_congestion_ops *ca;
- if (icsk->icsk_ca_ops != &tcp_init_congestion_ops)
- return;
+ /* if no choice made yet assign the current value set as default */
+ if (icsk->icsk_ca_ops == &tcp_init_congestion_ops) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
+ if (try_module_get(ca->owner)) {
+ icsk->icsk_ca_ops = ca;
+ break;
+ }
- rcu_read_lock();
- list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
- if (try_module_get(ca->owner)) {
- icsk->icsk_ca_ops = ca;
- break;
+ /* fallback to next available */
}
-
+ rcu_read_unlock();
}
- rcu_read_unlock();
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
spin_lock(&tcp_cong_list_lock);
ca = tcp_ca_find(name);
#ifdef CONFIG_KMOD
- if (!ca) {
+ if (!ca && capable(CAP_SYS_MODULE)) {
spin_unlock(&tcp_cong_list_lock);
request_module("tcp_%s", name);
#endif
if (ca) {
+ ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
list_move(&ca->list, &tcp_cong_list);
ret = 0;
}
return ret;
}
+/* Set default value from kernel configuration at bootup */
+static int __init tcp_congestion_default(void)
+{
+ return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG);
+}
+late_initcall(tcp_congestion_default);
+
+
+/* Build string with list of available congestion control values */
+void tcp_get_available_congestion_control(char *buf, size_t maxlen)
+{
+ struct tcp_congestion_ops *ca;
+ size_t offs = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
+ offs += snprintf(buf + offs, maxlen - offs,
+ "%s%s",
+ offs == 0 ? "" : " ", ca->name);
+
+ }
+ rcu_read_unlock();
+}
+
/* Get current default congestion control */
void tcp_get_default_congestion_control(char *name)
{
rcu_read_unlock();
}
+/* Built list of non-restricted congestion control values */
+void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
+{
+ struct tcp_congestion_ops *ca;
+ size_t offs = 0;
+
+ *buf = '\0';
+ rcu_read_lock();
+ list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
+ if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
+ continue;
+ offs += snprintf(buf + offs, maxlen - offs,
+ "%s%s",
+ offs == 0 ? "" : " ", ca->name);
+
+ }
+ rcu_read_unlock();
+}
+
+/* Change list of non-restricted congestion control */
+int tcp_set_allowed_congestion_control(char *val)
+{
+ struct tcp_congestion_ops *ca;
+ char *clone, *name;
+ int ret = 0;
+
+ clone = kstrdup(val, GFP_USER);
+ if (!clone)
+ return -ENOMEM;
+
+ spin_lock(&tcp_cong_list_lock);
+ /* pass 1 check for bad entries */
+ while ((name = strsep(&clone, " ")) && *name) {
+ ca = tcp_ca_find(name);
+ if (!ca) {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+
+ /* pass 2 clear old values */
+ list_for_each_entry_rcu(ca, &tcp_cong_list, list)
+ ca->flags &= ~TCP_CONG_NON_RESTRICTED;
+
+ /* pass 3 mark as allowed */
+ while ((name = strsep(&val, " ")) && *name) {
+ ca = tcp_ca_find(name);
+ WARN_ON(!ca);
+ if (ca)
+ ca->flags |= TCP_CONG_NON_RESTRICTED;
+ }
+out:
+ spin_unlock(&tcp_cong_list_lock);
+
+ return ret;
+}
+
+
/* Change congestion control for socket */
int tcp_set_congestion_control(struct sock *sk, const char *name)
{
rcu_read_lock();
ca = tcp_ca_find(name);
+
+ /* no change asking for existing value */
if (ca == icsk->icsk_ca_ops)
goto out;
+#ifdef CONFIG_KMOD
+ /* not found attempt to autoload module */
+ if (!ca && capable(CAP_SYS_MODULE)) {
+ rcu_read_unlock();
+ request_module("tcp_%s", name);
+ rcu_read_lock();
+ ca = tcp_ca_find(name);
+ }
+#endif
if (!ca)
err = -ENOENT;
+ else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
+ err = -EPERM;
+
else if (!try_module_get(ca->owner))
err = -EBUSY;
else {
tcp_cleanup_congestion_control(sk);
icsk->icsk_ca_ops = ca;
- if (icsk->icsk_ca_ops->init)
+
+ if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
}
out:
/*
- * Linear increase during slow start
+ * Slow start (exponential increase) with
+ * RFC3742 Limited Slow Start (fast linear increase) support.
*/
void tcp_slow_start(struct tcp_sock *tp)
{
+ int cnt = 0;
+
if (sysctl_tcp_abc) {
/* RFC3465: Slow Start
* TCP sender SHOULD increase cwnd by the number of
*/
if (tp->bytes_acked < tp->mss_cache)
return;
-
- /* We MAY increase by 2 if discovered delayed ack */
- if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- }
}
+
+ if (sysctl_tcp_max_ssthresh > 0 &&
+ tp->snd_cwnd > sysctl_tcp_max_ssthresh)
+ cnt += sysctl_tcp_max_ssthresh>>1;
+ else
+ cnt += tp->snd_cwnd;
+
+ /* RFC3465: We MAY increase by 2 if discovered delayed ack */
+ if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
+ cnt <<= 1;
tp->bytes_acked = 0;
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
+ tp->snd_cwnd_cnt += cnt;
+ while (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ tp->snd_cwnd_cnt -= tp->snd_cwnd;
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ }
}
EXPORT_SYMBOL_GPL(tcp_slow_start);
return;
/* In "safe" area, increase. */
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tp->snd_cwnd <= tp->snd_ssthresh)
tcp_slow_start(tp);
- /* In dangerous area, increase slowly. */
+ /* In dangerous area, increase slowly. */
else if (sysctl_tcp_abc) {
- /* RFC3465: Appropriate Byte Count
- * increase once for each full cwnd acked
- */
- if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
- tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- }
- } else {
- /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
- tp->snd_cwnd_cnt = 0;
- } else
- tp->snd_cwnd_cnt++;
- }
+ /* RFC3465: Appropriate Byte Count
+ * increase once for each full cwnd acked
+ */
+ if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
+ tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ }
+ } else {
+ /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
+ if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+ tp->snd_cwnd++;
+ tp->snd_cwnd_cnt = 0;
+ } else
+ tp->snd_cwnd_cnt++;
+ }
}
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
struct tcp_congestion_ops tcp_reno = {
+ .flags = TCP_CONG_NON_RESTRICTED,
.name = "reno",
.owner = THIS_MODULE,
.ssthresh = tcp_reno_ssthresh,