1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * TCP Illinois congestion control. 4 * Home page: 5 * http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html 6 * 7 * The algorithm is described in: 8 * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm 9 * for High-Speed Networks" 10 * http://tamerbasar.csl.illinois.edu/LiuBasarSrikantPerfEvalArtJun2008.pdf 11 * 12 * Implemented from description in paper and ns-2 simulation. 13 * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org> 14 */ 15 16 #include <linux/module.h> 17 #include <linux/skbuff.h> 18 #include <linux/inet_diag.h> 19 #include <asm/div64.h> 20 #include <net/tcp.h> 21 22 #define ALPHA_SHIFT 7 23 #define ALPHA_SCALE (1u<<ALPHA_SHIFT) 24 #define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ 25 #define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ 26 #define ALPHA_BASE ALPHA_SCALE /* 1.0 */ 27 #define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */ 28 29 #define BETA_SHIFT 6 30 #define BETA_SCALE (1u<<BETA_SHIFT) 31 #define BETA_MIN (BETA_SCALE/8) /* 0.125 */ 32 #define BETA_MAX (BETA_SCALE/2) /* 0.5 */ 33 #define BETA_BASE BETA_MAX 34 35 static int win_thresh __read_mostly = 15; 36 module_param(win_thresh, int, 0); 37 MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing"); 38 39 static int theta __read_mostly = 5; 40 module_param(theta, int, 0); 41 MODULE_PARM_DESC(theta, "# of fast RTT's before full growth"); 42 43 /* TCP Illinois Parameters */ 44 struct illinois { 45 u64 sum_rtt; /* sum of rtt's measured within last rtt */ 46 u16 cnt_rtt; /* # of rtts measured within last rtt */ 47 u32 base_rtt; /* min of all rtt in usec */ 48 u32 max_rtt; /* max of all rtt in usec */ 49 u32 end_seq; /* right edge of current RTT */ 50 u32 alpha; /* Additive increase */ 51 u32 beta; /* Muliplicative decrease */ 52 u16 acked; /* # packets acked by current ACK */ 53 u8 rtt_above; /* average rtt has gone above threshold */ 54 u8 rtt_low; /* # of rtts measurements below threshold */ 55 }; 56 57 static void rtt_reset(struct sock *sk) 58 { 59 struct tcp_sock *tp = tcp_sk(sk); 60 struct illinois *ca = inet_csk_ca(sk); 61 62 ca->end_seq = tp->snd_nxt; 63 ca->cnt_rtt = 0; 64 ca->sum_rtt = 0; 65 66 /* TODO: age max_rtt? */ 67 } 68 69 static void tcp_illinois_init(struct sock *sk) 70 { 71 struct illinois *ca = inet_csk_ca(sk); 72 73 ca->alpha = ALPHA_MAX; 74 ca->beta = BETA_BASE; 75 ca->base_rtt = 0x7fffffff; 76 ca->max_rtt = 0; 77 78 ca->acked = 0; 79 ca->rtt_low = 0; 80 ca->rtt_above = 0; 81 82 rtt_reset(sk); 83 } 84 85 /* Measure RTT for each ack. */ 86 static void tcp_illinois_acked(struct sock *sk, const struct ack_sample *sample) 87 { 88 struct illinois *ca = inet_csk_ca(sk); 89 s32 rtt_us = sample->rtt_us; 90 91 ca->acked = sample->pkts_acked; 92 93 /* dup ack, no rtt sample */ 94 if (rtt_us < 0) 95 return; 96 97 /* ignore bogus values, this prevents wraparound in alpha math */ 98 if (rtt_us > RTT_MAX) 99 rtt_us = RTT_MAX; 100 101 /* keep track of minimum RTT seen so far */ 102 if (ca->base_rtt > rtt_us) 103 ca->base_rtt = rtt_us; 104 105 /* and max */ 106 if (ca->max_rtt < rtt_us) 107 ca->max_rtt = rtt_us; 108 109 ++ca->cnt_rtt; 110 ca->sum_rtt += rtt_us; 111 } 112 113 /* Maximum queuing delay */ 114 static inline u32 max_delay(const struct illinois *ca) 115 { 116 return ca->max_rtt - ca->base_rtt; 117 } 118 119 /* Average queuing delay */ 120 static inline u32 avg_delay(const struct illinois *ca) 121 { 122 u64 t = ca->sum_rtt; 123 124 do_div(t, ca->cnt_rtt); 125 return t - ca->base_rtt; 126 } 127 128 /* 129 * Compute value of alpha used for additive increase. 130 * If small window then use 1.0, equivalent to Reno. 131 * 132 * For larger windows, adjust based on average delay. 133 * A. If average delay is at minimum (we are uncongested), 134 * then use large alpha (10.0) to increase faster. 135 * B. If average delay is at maximum (getting congested) 136 * then use small alpha (0.3) 137 * 138 * The result is a convex window growth curve. 139 */ 140 static u32 alpha(struct illinois *ca, u32 da, u32 dm) 141 { 142 u32 d1 = dm / 100; /* Low threshold */ 143 144 if (da <= d1) { 145 /* If never got out of low delay zone, then use max */ 146 if (!ca->rtt_above) 147 return ALPHA_MAX; 148 149 /* Wait for 5 good RTT's before allowing alpha to go alpha max. 150 * This prevents one good RTT from causing sudden window increase. 151 */ 152 if (++ca->rtt_low < theta) 153 return ca->alpha; 154 155 ca->rtt_low = 0; 156 ca->rtt_above = 0; 157 return ALPHA_MAX; 158 } 159 160 ca->rtt_above = 1; 161 162 /* 163 * Based on: 164 * 165 * (dm - d1) amin amax 166 * k1 = ------------------- 167 * amax - amin 168 * 169 * (dm - d1) amin 170 * k2 = ---------------- - d1 171 * amax - amin 172 * 173 * k1 174 * alpha = ---------- 175 * k2 + da 176 */ 177 178 dm -= d1; 179 da -= d1; 180 return (dm * ALPHA_MAX) / 181 (dm + (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN); 182 } 183 184 /* 185 * Beta used for multiplicative decrease. 186 * For small window sizes returns same value as Reno (0.5) 187 * 188 * If delay is small (10% of max) then beta = 1/8 189 * If delay is up to 80% of max then beta = 1/2 190 * In between is a linear function 191 */ 192 static u32 beta(u32 da, u32 dm) 193 { 194 u32 d2, d3; 195 196 d2 = dm / 10; 197 if (da <= d2) 198 return BETA_MIN; 199 200 d3 = (8 * dm) / 10; 201 if (da >= d3 || d3 <= d2) 202 return BETA_MAX; 203 204 /* 205 * Based on: 206 * 207 * bmin d3 - bmax d2 208 * k3 = ------------------- 209 * d3 - d2 210 * 211 * bmax - bmin 212 * k4 = ------------- 213 * d3 - d2 214 * 215 * b = k3 + k4 da 216 */ 217 return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da) 218 / (d3 - d2); 219 } 220 221 /* Update alpha and beta values once per RTT */ 222 static void update_params(struct sock *sk) 223 { 224 struct tcp_sock *tp = tcp_sk(sk); 225 struct illinois *ca = inet_csk_ca(sk); 226 227 if (tcp_snd_cwnd(tp) < win_thresh) { 228 ca->alpha = ALPHA_BASE; 229 ca->beta = BETA_BASE; 230 } else if (ca->cnt_rtt > 0) { 231 u32 dm = max_delay(ca); 232 u32 da = avg_delay(ca); 233 234 ca->alpha = alpha(ca, da, dm); 235 ca->beta = beta(da, dm); 236 } 237 238 rtt_reset(sk); 239 } 240 241 /* 242 * In case of loss, reset to default values 243 */ 244 static void tcp_illinois_state(struct sock *sk, u8 new_state) 245 { 246 struct illinois *ca = inet_csk_ca(sk); 247 248 if (new_state == TCP_CA_Loss) { 249 ca->alpha = ALPHA_BASE; 250 ca->beta = BETA_BASE; 251 ca->rtt_low = 0; 252 ca->rtt_above = 0; 253 rtt_reset(sk); 254 } 255 } 256 257 /* 258 * Increase window in response to successful acknowledgment. 259 */ 260 static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) 261 { 262 struct tcp_sock *tp = tcp_sk(sk); 263 struct illinois *ca = inet_csk_ca(sk); 264 265 if (after(ack, ca->end_seq)) 266 update_params(sk); 267 268 /* RFC2861 only increase cwnd if fully utilized */ 269 if (!tcp_is_cwnd_limited(sk)) 270 return; 271 272 /* In slow start */ 273 if (tcp_in_slow_start(tp)) 274 tcp_slow_start(tp, acked); 275 276 else { 277 u32 delta; 278 279 /* snd_cwnd_cnt is # of packets since last cwnd increment */ 280 tp->snd_cwnd_cnt += ca->acked; 281 ca->acked = 1; 282 283 /* This is close approximation of: 284 * tp->snd_cwnd += alpha/tp->snd_cwnd 285 */ 286 delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT; 287 if (delta >= tcp_snd_cwnd(tp)) { 288 tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp) + delta / tcp_snd_cwnd(tp), 289 (u32)tp->snd_cwnd_clamp)); 290 tp->snd_cwnd_cnt = 0; 291 } 292 } 293 } 294 295 static u32 tcp_illinois_ssthresh(struct sock *sk) 296 { 297 struct tcp_sock *tp = tcp_sk(sk); 298 struct illinois *ca = inet_csk_ca(sk); 299 u32 decr; 300 301 /* Multiplicative decrease */ 302 decr = (tcp_snd_cwnd(tp) * ca->beta) >> BETA_SHIFT; 303 return max(tcp_snd_cwnd(tp) - decr, 2U); 304 } 305 306 /* Extract info for Tcp socket info provided via netlink. */ 307 static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, 308 union tcp_cc_info *info) 309 { 310 const struct illinois *ca = inet_csk_ca(sk); 311 312 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { 313 info->vegas.tcpv_enabled = 1; 314 info->vegas.tcpv_rttcnt = ca->cnt_rtt; 315 info->vegas.tcpv_minrtt = ca->base_rtt; 316 info->vegas.tcpv_rtt = 0; 317 318 if (info->vegas.tcpv_rttcnt > 0) { 319 u64 t = ca->sum_rtt; 320 321 do_div(t, info->vegas.tcpv_rttcnt); 322 info->vegas.tcpv_rtt = t; 323 } 324 *attr = INET_DIAG_VEGASINFO; 325 return sizeof(struct tcpvegas_info); 326 } 327 return 0; 328 } 329 330 static struct tcp_congestion_ops tcp_illinois __read_mostly = { 331 .init = tcp_illinois_init, 332 .ssthresh = tcp_illinois_ssthresh, 333 .undo_cwnd = tcp_reno_undo_cwnd, 334 .cong_avoid = tcp_illinois_cong_avoid, 335 .set_state = tcp_illinois_state, 336 .get_info = tcp_illinois_info, 337 .pkts_acked = tcp_illinois_acked, 338 339 .owner = THIS_MODULE, 340 .name = "illinois", 341 }; 342 343 static int __init tcp_illinois_register(void) 344 { 345 BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE); 346 return tcp_register_congestion_control(&tcp_illinois); 347 } 348 349 static void __exit tcp_illinois_unregister(void) 350 { 351 tcp_unregister_congestion_control(&tcp_illinois); 352 } 353 354 module_init(tcp_illinois_register); 355 module_exit(tcp_illinois_unregister); 356 357 MODULE_AUTHOR("Stephen Hemminger, Shao Liu"); 358 MODULE_LICENSE("GPL"); 359 MODULE_DESCRIPTION("TCP Illinois"); 360 MODULE_VERSION("1.0"); 361
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.