1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Management of Tx window, Tx resend, ACKs and out-of-sequence reception 3 * 4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/module.h> 11 #include <linux/circ_buf.h> 12 #include <linux/net.h> 13 #include <linux/skbuff.h> 14 #include <linux/slab.h> 15 #include <linux/udp.h> 16 #include <net/sock.h> 17 #include <net/af_rxrpc.h> 18 #include "ar-internal.h" 19 20 /* 21 * Propose a PING ACK be sent. 22 */ 23 void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, 24 enum rxrpc_propose_ack_trace why) 25 { 26 ktime_t delay = ms_to_ktime(READ_ONCE(rxrpc_idle_ack_delay)); 27 ktime_t now = ktime_get_real(); 28 ktime_t ping_at = ktime_add(now, delay); 29 30 trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial); 31 if (ktime_before(ping_at, call->ping_at)) { 32 call->ping_at = ping_at; 33 trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_ping); 34 } 35 } 36 37 /* 38 * Propose a DELAY ACK be sent in the future. 39 */ 40 void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, 41 enum rxrpc_propose_ack_trace why) 42 { 43 ktime_t now = ktime_get_real(), delay; 44 45 trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial); 46 47 if (call->peer->srtt_us) 48 delay = (call->peer->srtt_us >> 3) * NSEC_PER_USEC; 49 else 50 delay = ms_to_ktime(READ_ONCE(rxrpc_soft_ack_delay)); 51 ktime_add_ms(delay, call->tx_backoff); 52 53 call->delay_ack_at = ktime_add(now, delay); 54 trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_delayed_ack); 55 } 56 57 /* 58 * Handle congestion being detected by the retransmit timeout. 59 */ 60 static void rxrpc_congestion_timeout(struct rxrpc_call *call) 61 { 62 set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags); 63 } 64 65 /* 66 * Perform retransmission of NAK'd and unack'd packets. 67 */ 68 void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) 69 { 70 struct rxrpc_ackpacket *ack = NULL; 71 struct rxrpc_skb_priv *sp; 72 struct rxrpc_txbuf *txb; 73 rxrpc_seq_t transmitted = call->tx_transmitted; 74 ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); 75 ktime_t resend_at = KTIME_MAX, now, delay; 76 bool unacked = false, did_send = false; 77 unsigned int i; 78 79 _enter("{%d,%d}", call->acks_hard_ack, call->tx_top); 80 81 now = ktime_get_real(); 82 83 if (list_empty(&call->tx_buffer)) 84 goto no_resend; 85 86 trace_rxrpc_resend(call, ack_skb); 87 txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link); 88 89 /* Scan the soft ACK table without dropping the lock and resend any 90 * explicitly NAK'd packets. 91 */ 92 if (ack_skb) { 93 sp = rxrpc_skb(ack_skb); 94 ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); 95 96 for (i = 0; i < sp->ack.nr_acks; i++) { 97 rxrpc_seq_t seq; 98 99 if (ack->acks[i] & 1) 100 continue; 101 seq = sp->ack.first_ack + i; 102 if (after(txb->seq, transmitted)) 103 break; 104 if (after(txb->seq, seq)) 105 continue; /* A new hard ACK probably came in */ 106 list_for_each_entry_from(txb, &call->tx_buffer, call_link) { 107 if (txb->seq == seq) 108 goto found_txb; 109 } 110 goto no_further_resend; 111 112 found_txb: 113 resend_at = ktime_add(txb->last_sent, rto); 114 if (after(txb->serial, call->acks_highest_serial)) { 115 if (ktime_after(resend_at, now) && 116 ktime_before(resend_at, next_resend)) 117 next_resend = resend_at; 118 continue; /* Ack point not yet reached */ 119 } 120 121 rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked); 122 123 trace_rxrpc_retransmit(call, txb->seq, txb->serial, 124 ktime_sub(resend_at, now)); 125 126 txb->flags |= RXRPC_TXBUF_RESENT; 127 rxrpc_transmit_one(call, txb); 128 did_send = true; 129 now = ktime_get_real(); 130 131 if (list_is_last(&txb->call_link, &call->tx_buffer)) 132 goto no_further_resend; 133 txb = list_next_entry(txb, call_link); 134 } 135 } 136 137 /* Fast-forward through the Tx queue to the point the peer says it has 138 * seen. Anything between the soft-ACK table and that point will get 139 * ACK'd or NACK'd in due course, so don't worry about it here; here we 140 * need to consider retransmitting anything beyond that point. 141 */ 142 if (after_eq(call->acks_prev_seq, call->tx_transmitted)) 143 goto no_further_resend; 144 145 list_for_each_entry_from(txb, &call->tx_buffer, call_link) { 146 resend_at = ktime_add(txb->last_sent, rto); 147 148 if (before_eq(txb->seq, call->acks_prev_seq)) 149 continue; 150 if (after(txb->seq, call->tx_transmitted)) 151 break; /* Not transmitted yet */ 152 153 if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE && 154 before(txb->serial, ntohl(ack->serial))) 155 goto do_resend; /* Wasn't accounted for by a more recent ping. */ 156 157 if (ktime_after(resend_at, now)) { 158 if (ktime_before(resend_at, next_resend)) 159 next_resend = resend_at; 160 continue; 161 } 162 163 do_resend: 164 unacked = true; 165 166 txb->flags |= RXRPC_TXBUF_RESENT; 167 rxrpc_transmit_one(call, txb); 168 did_send = true; 169 rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); 170 now = ktime_get_real(); 171 } 172 173 no_further_resend: 174 no_resend: 175 if (resend_at < KTIME_MAX) { 176 delay = rxrpc_get_rto_backoff(call->peer, did_send); 177 resend_at = ktime_add(resend_at, delay); 178 trace_rxrpc_timer_set(call, resend_at - now, rxrpc_timer_trace_resend_reset); 179 } 180 call->resend_at = resend_at; 181 182 if (unacked) 183 rxrpc_congestion_timeout(call); 184 185 /* If there was nothing that needed retransmission then it's likely 186 * that an ACK got lost somewhere. Send a ping to find out instead of 187 * retransmitting data. 188 */ 189 if (!did_send) { 190 ktime_t next_ping = ktime_add_us(call->acks_latest_ts, 191 call->peer->srtt_us >> 3); 192 193 if (ktime_sub(next_ping, now) <= 0) 194 rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, 195 rxrpc_propose_ack_ping_for_0_retrans); 196 } 197 198 _leave(""); 199 } 200 201 /* 202 * Start transmitting the reply to a service. This cancels the need to ACK the 203 * request if we haven't yet done so. 204 */ 205 static void rxrpc_begin_service_reply(struct rxrpc_call *call) 206 { 207 rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY); 208 if (call->ackr_reason == RXRPC_ACK_DELAY) 209 call->ackr_reason = 0; 210 call->delay_ack_at = KTIME_MAX; 211 trace_rxrpc_timer_can(call, rxrpc_timer_trace_delayed_ack); 212 } 213 214 /* 215 * Close the transmission phase. After this point there is no more data to be 216 * transmitted in the call. 217 */ 218 static void rxrpc_close_tx_phase(struct rxrpc_call *call) 219 { 220 _debug("________awaiting reply/ACK__________"); 221 222 switch (__rxrpc_call_state(call)) { 223 case RXRPC_CALL_CLIENT_SEND_REQUEST: 224 rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY); 225 break; 226 case RXRPC_CALL_SERVER_SEND_REPLY: 227 rxrpc_set_call_state(call, RXRPC_CALL_SERVER_AWAIT_ACK); 228 break; 229 default: 230 break; 231 } 232 } 233 234 static bool rxrpc_tx_window_has_space(struct rxrpc_call *call) 235 { 236 unsigned int winsize = min_t(unsigned int, call->tx_winsize, 237 call->cong_cwnd + call->cong_extra); 238 rxrpc_seq_t window = call->acks_hard_ack, wtop = window + winsize; 239 rxrpc_seq_t tx_top = call->tx_top; 240 int space; 241 242 space = wtop - tx_top; 243 return space > 0; 244 } 245 246 /* 247 * Decant some if the sendmsg prepared queue into the transmission buffer. 248 */ 249 static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) 250 { 251 struct rxrpc_txbuf *txb; 252 253 if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { 254 if (list_empty(&call->tx_sendmsg)) 255 return; 256 rxrpc_expose_client_call(call); 257 } 258 259 while ((txb = list_first_entry_or_null(&call->tx_sendmsg, 260 struct rxrpc_txbuf, call_link))) { 261 spin_lock(&call->tx_lock); 262 list_del(&txb->call_link); 263 spin_unlock(&call->tx_lock); 264 265 call->tx_top = txb->seq; 266 list_add_tail(&txb->call_link, &call->tx_buffer); 267 268 if (txb->flags & RXRPC_LAST_PACKET) 269 rxrpc_close_tx_phase(call); 270 271 rxrpc_transmit_one(call, txb); 272 273 if (!rxrpc_tx_window_has_space(call)) 274 break; 275 } 276 } 277 278 static void rxrpc_transmit_some_data(struct rxrpc_call *call) 279 { 280 switch (__rxrpc_call_state(call)) { 281 case RXRPC_CALL_SERVER_ACK_REQUEST: 282 if (list_empty(&call->tx_sendmsg)) 283 return; 284 rxrpc_begin_service_reply(call); 285 fallthrough; 286 287 case RXRPC_CALL_SERVER_SEND_REPLY: 288 case RXRPC_CALL_CLIENT_SEND_REQUEST: 289 if (!rxrpc_tx_window_has_space(call)) 290 return; 291 if (list_empty(&call->tx_sendmsg)) { 292 rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow); 293 return; 294 } 295 rxrpc_decant_prepared_tx(call); 296 break; 297 default: 298 return; 299 } 300 } 301 302 /* 303 * Ping the other end to fill our RTT cache and to retrieve the rwind 304 * and MTU parameters. 305 */ 306 static void rxrpc_send_initial_ping(struct rxrpc_call *call) 307 { 308 if (call->peer->rtt_count < 3 || 309 ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), 310 ktime_get_real())) 311 rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, 312 rxrpc_propose_ack_ping_for_params); 313 } 314 315 /* 316 * Handle retransmission and deferred ACK/abort generation. 317 */ 318 bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) 319 { 320 ktime_t now, t; 321 bool resend = false; 322 s32 abort_code; 323 324 rxrpc_see_call(call, rxrpc_call_see_input); 325 326 //printk("\n--------------------\n"); 327 _enter("{%d,%s,%lx}", 328 call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)], 329 call->events); 330 331 if (__rxrpc_call_is_complete(call)) 332 goto out; 333 334 /* Handle abort request locklessly, vs rxrpc_propose_abort(). */ 335 abort_code = smp_load_acquire(&call->send_abort); 336 if (abort_code) { 337 rxrpc_abort_call(call, 0, call->send_abort, call->send_abort_err, 338 call->send_abort_why); 339 goto out; 340 } 341 342 if (skb && skb->mark == RXRPC_SKB_MARK_ERROR) 343 goto out; 344 345 if (skb) 346 rxrpc_input_call_packet(call, skb); 347 348 /* If we see our async-event poke, check for timeout trippage. */ 349 now = ktime_get_real(); 350 t = ktime_sub(call->expect_rx_by, now); 351 if (t <= 0) { 352 trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_expect_rx); 353 goto expired; 354 } 355 356 t = ktime_sub(call->expect_req_by, now); 357 if (t <= 0) { 358 call->expect_req_by = KTIME_MAX; 359 if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST) { 360 trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_idle); 361 goto expired; 362 } 363 } 364 365 t = ktime_sub(READ_ONCE(call->expect_term_by), now); 366 if (t <= 0) { 367 trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_hard); 368 goto expired; 369 } 370 371 t = ktime_sub(call->delay_ack_at, now); 372 if (t <= 0) { 373 trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_delayed_ack); 374 call->delay_ack_at = KTIME_MAX; 375 rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0, 376 rxrpc_propose_ack_delayed_ack); 377 } 378 379 t = ktime_sub(call->ack_lost_at, now); 380 if (t <= 0) { 381 trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_lost_ack); 382 call->ack_lost_at = KTIME_MAX; 383 set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); 384 } 385 386 t = ktime_sub(call->ping_at, now); 387 if (t <= 0) { 388 trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_ping); 389 call->ping_at = KTIME_MAX; 390 rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, 391 rxrpc_propose_ack_ping_for_keepalive); 392 } 393 394 t = ktime_sub(call->resend_at, now); 395 if (t <= 0) { 396 trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_resend); 397 call->resend_at = KTIME_MAX; 398 resend = true; 399 } 400 401 rxrpc_transmit_some_data(call); 402 403 now = ktime_get_real(); 404 t = ktime_sub(call->keepalive_at, now); 405 if (t <= 0) { 406 trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_keepalive); 407 call->keepalive_at = KTIME_MAX; 408 rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, 409 rxrpc_propose_ack_ping_for_keepalive); 410 } 411 412 if (skb) { 413 struct rxrpc_skb_priv *sp = rxrpc_skb(skb); 414 415 if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK) 416 rxrpc_congestion_degrade(call); 417 } 418 419 if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events)) 420 rxrpc_send_initial_ping(call); 421 422 /* Process events */ 423 if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) 424 rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, 425 rxrpc_propose_ack_ping_for_lost_ack); 426 427 if (resend && 428 __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY && 429 !test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags)) 430 rxrpc_resend(call, NULL); 431 432 if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) 433 rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, 434 rxrpc_propose_ack_rx_idle); 435 436 if (call->ackr_nr_unacked > 2) { 437 if (call->peer->rtt_count < 3) 438 rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, 439 rxrpc_propose_ack_ping_for_rtt); 440 else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), 441 ktime_get_real())) 442 rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, 443 rxrpc_propose_ack_ping_for_old_rtt); 444 else 445 rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, 446 rxrpc_propose_ack_input_data); 447 } 448 449 /* Make sure the timer is restarted */ 450 if (!__rxrpc_call_is_complete(call)) { 451 ktime_t next = READ_ONCE(call->expect_term_by), delay; 452 453 #define set(T) { ktime_t _t = (T); if (ktime_before(_t, next)) next = _t; } 454 455 set(call->expect_req_by); 456 set(call->expect_rx_by); 457 set(call->delay_ack_at); 458 set(call->ack_lost_at); 459 set(call->resend_at); 460 set(call->keepalive_at); 461 set(call->ping_at); 462 463 now = ktime_get_real(); 464 delay = ktime_sub(next, now); 465 if (delay <= 0) { 466 rxrpc_poke_call(call, rxrpc_call_poke_timer_now); 467 } else { 468 unsigned long nowj = jiffies, delayj, nextj; 469 470 delayj = max(nsecs_to_jiffies(delay), 1); 471 nextj = nowj + delayj; 472 if (time_before(nextj, call->timer.expires) || 473 !timer_pending(&call->timer)) { 474 trace_rxrpc_timer_restart(call, delay, delayj); 475 timer_reduce(&call->timer, nextj); 476 } 477 } 478 } 479 480 out: 481 if (__rxrpc_call_is_complete(call)) { 482 del_timer_sync(&call->timer); 483 if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) 484 rxrpc_disconnect_call(call); 485 if (call->security) 486 call->security->free_call_crypto(call); 487 } 488 if (call->acks_hard_ack != call->tx_bottom) 489 rxrpc_shrink_call_tx_buffer(call); 490 _leave(""); 491 return true; 492 493 expired: 494 if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && 495 (int)call->conn->hi_serial - (int)call->rx_serial > 0) { 496 trace_rxrpc_call_reset(call); 497 rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET, 498 rxrpc_abort_call_reset); 499 } else { 500 rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME, 501 rxrpc_abort_call_timeout); 502 } 503 goto out; 504 } 505
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.