~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/tools/testing/selftests/bpf/progs/bpf_dctcp.c

Version: ~ [ linux-6.11-rc3 ] ~ [ linux-6.10.4 ] ~ [ linux-6.9.12 ] ~ [ linux-6.8.12 ] ~ [ linux-6.7.12 ] ~ [ linux-6.6.45 ] ~ [ linux-6.5.13 ] ~ [ linux-6.4.16 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.104 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.164 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.223 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.281 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.319 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.336 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /* Copyright (c) 2019 Facebook */
  3 
  4 /* WARNING: This implemenation is not necessarily the same
  5  * as the tcp_dctcp.c.  The purpose is mainly for testing
  6  * the kernel BPF logic.
  7  */
  8 
  9 #include "bpf_tracing_net.h"
 10 #include <bpf/bpf_helpers.h>
 11 #include <bpf/bpf_tracing.h>
 12 
 13 #ifndef EBUSY
 14 #define EBUSY 16
 15 #endif
 16 #define min(a, b) ((a) < (b) ? (a) : (b))
 17 #define max(a, b) ((a) > (b) ? (a) : (b))
 18 #define min_not_zero(x, y) ({                   \
 19         typeof(x) __x = (x);                    \
 20         typeof(y) __y = (y);                    \
 21         __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
 22 static bool before(__u32 seq1, __u32 seq2)
 23 {
 24         return (__s32)(seq1-seq2) < 0;
 25 }
 26 
 27 char _license[] SEC("license") = "GPL";
 28 
 29 volatile const char fallback[TCP_CA_NAME_MAX];
 30 const char bpf_dctcp[] = "bpf_dctcp";
 31 const char tcp_cdg[] = "cdg";
 32 char cc_res[TCP_CA_NAME_MAX];
 33 int tcp_cdg_res = 0;
 34 int stg_result = 0;
 35 int ebusy_cnt = 0;
 36 
 37 struct {
 38         __uint(type, BPF_MAP_TYPE_SK_STORAGE);
 39         __uint(map_flags, BPF_F_NO_PREALLOC);
 40         __type(key, int);
 41         __type(value, int);
 42 } sk_stg_map SEC(".maps");
 43 
 44 #define DCTCP_MAX_ALPHA 1024U
 45 
 46 struct bpf_dctcp {
 47         __u32 old_delivered;
 48         __u32 old_delivered_ce;
 49         __u32 prior_rcv_nxt;
 50         __u32 dctcp_alpha;
 51         __u32 next_seq;
 52         __u32 ce_state;
 53         __u32 loss_cwnd;
 54 };
 55 
 56 static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */
 57 static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA;
 58 
 59 static void dctcp_reset(const struct tcp_sock *tp, struct bpf_dctcp *ca)
 60 {
 61         ca->next_seq = tp->snd_nxt;
 62 
 63         ca->old_delivered = tp->delivered;
 64         ca->old_delivered_ce = tp->delivered_ce;
 65 }
 66 
 67 SEC("struct_ops")
 68 void BPF_PROG(bpf_dctcp_init, struct sock *sk)
 69 {
 70         const struct tcp_sock *tp = tcp_sk(sk);
 71         struct bpf_dctcp *ca = inet_csk_ca(sk);
 72         int *stg;
 73 
 74         if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
 75                 /* Switch to fallback */
 76                 if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
 77                                    (void *)fallback, sizeof(fallback)) == -EBUSY)
 78                         ebusy_cnt++;
 79 
 80                 /* Switch back to myself and the recurred bpf_dctcp_init()
 81                  * will get -EBUSY for all bpf_setsockopt(TCP_CONGESTION),
 82                  * except the last "cdg" one.
 83                  */
 84                 if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
 85                                    (void *)bpf_dctcp, sizeof(bpf_dctcp)) == -EBUSY)
 86                         ebusy_cnt++;
 87 
 88                 /* Switch back to fallback */
 89                 if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
 90                                    (void *)fallback, sizeof(fallback)) == -EBUSY)
 91                         ebusy_cnt++;
 92 
 93                 /* Expecting -ENOTSUPP for tcp_cdg_res */
 94                 tcp_cdg_res = bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
 95                                              (void *)tcp_cdg, sizeof(tcp_cdg));
 96                 bpf_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
 97                                (void *)cc_res, sizeof(cc_res));
 98                 return;
 99         }
100 
101         ca->prior_rcv_nxt = tp->rcv_nxt;
102         ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
103         ca->loss_cwnd = 0;
104         ca->ce_state = 0;
105 
106         stg = bpf_sk_storage_get(&sk_stg_map, (void *)tp, NULL, 0);
107         if (stg) {
108                 stg_result = *stg;
109                 bpf_sk_storage_delete(&sk_stg_map, (void *)tp);
110         }
111         dctcp_reset(tp, ca);
112 }
113 
114 SEC("struct_ops")
115 __u32 BPF_PROG(bpf_dctcp_ssthresh, struct sock *sk)
116 {
117         struct bpf_dctcp *ca = inet_csk_ca(sk);
118         struct tcp_sock *tp = tcp_sk(sk);
119 
120         ca->loss_cwnd = tp->snd_cwnd;
121         return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
122 }
123 
124 SEC("struct_ops")
125 void BPF_PROG(bpf_dctcp_update_alpha, struct sock *sk, __u32 flags)
126 {
127         const struct tcp_sock *tp = tcp_sk(sk);
128         struct bpf_dctcp *ca = inet_csk_ca(sk);
129 
130         /* Expired RTT */
131         if (!before(tp->snd_una, ca->next_seq)) {
132                 __u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce;
133                 __u32 alpha = ca->dctcp_alpha;
134 
135                 /* alpha = (1 - g) * alpha + g * F */
136 
137                 alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
138                 if (delivered_ce) {
139                         __u32 delivered = tp->delivered - ca->old_delivered;
140 
141                         /* If dctcp_shift_g == 1, a 32bit value would overflow
142                          * after 8 M packets.
143                          */
144                         delivered_ce <<= (10 - dctcp_shift_g);
145                         delivered_ce /= max(1U, delivered);
146 
147                         alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA);
148                 }
149                 ca->dctcp_alpha = alpha;
150                 dctcp_reset(tp, ca);
151         }
152 }
153 
154 static void dctcp_react_to_loss(struct sock *sk)
155 {
156         struct bpf_dctcp *ca = inet_csk_ca(sk);
157         struct tcp_sock *tp = tcp_sk(sk);
158 
159         ca->loss_cwnd = tp->snd_cwnd;
160         tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
161 }
162 
163 SEC("struct_ops")
164 void BPF_PROG(bpf_dctcp_state, struct sock *sk, __u8 new_state)
165 {
166         if (new_state == TCP_CA_Recovery &&
167             new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state))
168                 dctcp_react_to_loss(sk);
169         /* We handle RTO in bpf_dctcp_cwnd_event to ensure that we perform only
170          * one loss-adjustment per RTT.
171          */
172 }
173 
174 static void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
175 {
176         struct tcp_sock *tp = tcp_sk(sk);
177 
178         if (ce_state == 1)
179                 tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
180         else
181                 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
182 }
183 
184 /* Minimal DCTP CE state machine:
185  *
186  * S:   0 <- last pkt was non-CE
187  *      1 <- last pkt was CE
188  */
189 static void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
190                                  __u32 *prior_rcv_nxt, __u32 *ce_state)
191 {
192         __u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
193 
194         if (*ce_state != new_ce_state) {
195                 /* CE state has changed, force an immediate ACK to
196                  * reflect the new CE state. If an ACK was delayed,
197                  * send that first to reflect the prior CE state.
198                  */
199                 if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
200                         dctcp_ece_ack_cwr(sk, *ce_state);
201                         bpf_tcp_send_ack(sk, *prior_rcv_nxt);
202                 }
203                 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
204         }
205         *prior_rcv_nxt = tcp_sk(sk)->rcv_nxt;
206         *ce_state = new_ce_state;
207         dctcp_ece_ack_cwr(sk, new_ce_state);
208 }
209 
210 SEC("struct_ops")
211 void BPF_PROG(bpf_dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
212 {
213         struct bpf_dctcp *ca = inet_csk_ca(sk);
214 
215         switch (ev) {
216         case CA_EVENT_ECN_IS_CE:
217         case CA_EVENT_ECN_NO_CE:
218                 dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state);
219                 break;
220         case CA_EVENT_LOSS:
221                 dctcp_react_to_loss(sk);
222                 break;
223         default:
224                 /* Don't care for the rest. */
225                 break;
226         }
227 }
228 
229 SEC("struct_ops")
230 __u32 BPF_PROG(bpf_dctcp_cwnd_undo, struct sock *sk)
231 {
232         const struct bpf_dctcp *ca = inet_csk_ca(sk);
233 
234         return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
235 }
236 
237 extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
238 
239 SEC("struct_ops")
240 void BPF_PROG(bpf_dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
241 {
242         tcp_reno_cong_avoid(sk, ack, acked);
243 }
244 
245 SEC(".struct_ops")
246 struct tcp_congestion_ops dctcp_nouse = {
247         .init           = (void *)bpf_dctcp_init,
248         .set_state      = (void *)bpf_dctcp_state,
249         .flags          = TCP_CONG_NEEDS_ECN,
250         .name           = "bpf_dctcp_nouse",
251 };
252 
253 SEC(".struct_ops")
254 struct tcp_congestion_ops dctcp = {
255         .init           = (void *)bpf_dctcp_init,
256         .in_ack_event   = (void *)bpf_dctcp_update_alpha,
257         .cwnd_event     = (void *)bpf_dctcp_cwnd_event,
258         .ssthresh       = (void *)bpf_dctcp_ssthresh,
259         .cong_avoid     = (void *)bpf_dctcp_cong_avoid,
260         .undo_cwnd      = (void *)bpf_dctcp_cwnd_undo,
261         .set_state      = (void *)bpf_dctcp_state,
262         .flags          = TCP_CONG_NEEDS_ECN,
263         .name           = "bpf_dctcp",
264 };
265 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

sflogo.php