1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * ip_vs_app.c: Application module support for IPVS 4 * 5 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 6 * 7 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference 8 * is that ip_vs_app module handles the reverse direction (incoming requests 9 * and outgoing responses). 10 * 11 * IP_MASQ_APP application masquerading module 12 * 13 * Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar> 14 */ 15 16 #define KMSG_COMPONENT "IPVS" 17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 18 19 #include <linux/module.h> 20 #include <linux/kernel.h> 21 #include <linux/skbuff.h> 22 #include <linux/in.h> 23 #include <linux/ip.h> 24 #include <linux/netfilter.h> 25 #include <linux/slab.h> 26 #include <net/net_namespace.h> 27 #include <net/protocol.h> 28 #include <net/tcp.h> 29 #include <linux/stat.h> 30 #include <linux/proc_fs.h> 31 #include <linux/seq_file.h> 32 #include <linux/mutex.h> 33 34 #include <net/ip_vs.h> 35 36 EXPORT_SYMBOL(register_ip_vs_app); 37 EXPORT_SYMBOL(unregister_ip_vs_app); 38 EXPORT_SYMBOL(register_ip_vs_app_inc); 39 40 static DEFINE_MUTEX(__ip_vs_app_mutex); 41 42 /* 43 * Get an ip_vs_app object 44 */ 45 static inline int ip_vs_app_get(struct ip_vs_app *app) 46 { 47 return try_module_get(app->module); 48 } 49 50 51 static inline void ip_vs_app_put(struct ip_vs_app *app) 52 { 53 module_put(app->module); 54 } 55 56 static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) 57 { 58 kfree(inc->timeout_table); 59 kfree(inc); 60 } 61 62 static void ip_vs_app_inc_rcu_free(struct rcu_head *head) 63 { 64 struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); 65 66 ip_vs_app_inc_destroy(inc); 67 } 68 69 /* 70 * Allocate/initialize app incarnation and register it in proto apps. 71 */ 72 static int 73 ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, 74 __u16 port) 75 { 76 struct ip_vs_protocol *pp; 77 struct ip_vs_app *inc; 78 int ret; 79 80 if (!(pp = ip_vs_proto_get(proto))) 81 return -EPROTONOSUPPORT; 82 83 if (!pp->unregister_app) 84 return -EOPNOTSUPP; 85 86 inc = kmemdup(app, sizeof(*inc), GFP_KERNEL); 87 if (!inc) 88 return -ENOMEM; 89 INIT_LIST_HEAD(&inc->p_list); 90 INIT_LIST_HEAD(&inc->incs_list); 91 inc->app = app; 92 inc->port = htons(port); 93 atomic_set(&inc->usecnt, 0); 94 95 if (app->timeouts) { 96 inc->timeout_table = 97 ip_vs_create_timeout_table(app->timeouts, 98 app->timeouts_size); 99 if (!inc->timeout_table) { 100 ret = -ENOMEM; 101 goto out; 102 } 103 } 104 105 ret = pp->register_app(ipvs, inc); 106 if (ret) 107 goto out; 108 109 list_add(&inc->a_list, &app->incs_list); 110 IP_VS_DBG(9, "%s App %s:%u registered\n", 111 pp->name, inc->name, ntohs(inc->port)); 112 113 return 0; 114 115 out: 116 ip_vs_app_inc_destroy(inc); 117 return ret; 118 } 119 120 121 /* 122 * Release app incarnation 123 */ 124 static void 125 ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc) 126 { 127 struct ip_vs_protocol *pp; 128 129 if (!(pp = ip_vs_proto_get(inc->protocol))) 130 return; 131 132 if (pp->unregister_app) 133 pp->unregister_app(ipvs, inc); 134 135 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 136 pp->name, inc->name, ntohs(inc->port)); 137 138 list_del(&inc->a_list); 139 140 call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free); 141 } 142 143 144 /* 145 * Get reference to app inc (only called from softirq) 146 * 147 */ 148 int ip_vs_app_inc_get(struct ip_vs_app *inc) 149 { 150 int result; 151 152 result = ip_vs_app_get(inc->app); 153 if (result) 154 atomic_inc(&inc->usecnt); 155 return result; 156 } 157 158 159 /* 160 * Put the app inc (only called from timer or net softirq) 161 */ 162 void ip_vs_app_inc_put(struct ip_vs_app *inc) 163 { 164 atomic_dec(&inc->usecnt); 165 ip_vs_app_put(inc->app); 166 } 167 168 169 /* 170 * Register an application incarnation in protocol applications 171 */ 172 int 173 register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, 174 __u16 port) 175 { 176 int result; 177 178 mutex_lock(&__ip_vs_app_mutex); 179 180 result = ip_vs_app_inc_new(ipvs, app, proto, port); 181 182 mutex_unlock(&__ip_vs_app_mutex); 183 184 return result; 185 } 186 187 188 /* Register application for netns */ 189 struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) 190 { 191 struct ip_vs_app *a; 192 int err = 0; 193 194 mutex_lock(&__ip_vs_app_mutex); 195 196 /* increase the module use count */ 197 if (!ip_vs_use_count_inc()) { 198 err = -ENOENT; 199 goto out_unlock; 200 } 201 202 list_for_each_entry(a, &ipvs->app_list, a_list) { 203 if (!strcmp(app->name, a->name)) { 204 err = -EEXIST; 205 /* decrease the module use count */ 206 ip_vs_use_count_dec(); 207 goto out_unlock; 208 } 209 } 210 a = kmemdup(app, sizeof(*app), GFP_KERNEL); 211 if (!a) { 212 err = -ENOMEM; 213 /* decrease the module use count */ 214 ip_vs_use_count_dec(); 215 goto out_unlock; 216 } 217 INIT_LIST_HEAD(&a->incs_list); 218 list_add(&a->a_list, &ipvs->app_list); 219 220 out_unlock: 221 mutex_unlock(&__ip_vs_app_mutex); 222 223 return err ? ERR_PTR(err) : a; 224 } 225 226 227 /* 228 * ip_vs_app unregistration routine 229 * We are sure there are no app incarnations attached to services 230 * Caller should use synchronize_rcu() or rcu_barrier() 231 */ 232 void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) 233 { 234 struct ip_vs_app *a, *anxt, *inc, *nxt; 235 236 mutex_lock(&__ip_vs_app_mutex); 237 238 list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { 239 if (app && strcmp(app->name, a->name)) 240 continue; 241 list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { 242 ip_vs_app_inc_release(ipvs, inc); 243 } 244 245 list_del(&a->a_list); 246 kfree(a); 247 248 /* decrease the module use count */ 249 ip_vs_use_count_dec(); 250 } 251 252 mutex_unlock(&__ip_vs_app_mutex); 253 } 254 255 256 /* 257 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 258 */ 259 int ip_vs_bind_app(struct ip_vs_conn *cp, 260 struct ip_vs_protocol *pp) 261 { 262 return pp->app_conn_bind(cp); 263 } 264 265 266 /* 267 * Unbind cp from application incarnation (called by cp destructor) 268 */ 269 void ip_vs_unbind_app(struct ip_vs_conn *cp) 270 { 271 struct ip_vs_app *inc = cp->app; 272 273 if (!inc) 274 return; 275 276 if (inc->unbind_conn) 277 inc->unbind_conn(inc, cp); 278 if (inc->done_conn) 279 inc->done_conn(inc, cp); 280 ip_vs_app_inc_put(inc); 281 cp->app = NULL; 282 } 283 284 285 /* 286 * Fixes th->seq based on ip_vs_seq info. 287 */ 288 static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 289 { 290 __u32 seq = ntohl(th->seq); 291 292 /* 293 * Adjust seq with delta-offset for all packets after 294 * the most recent resized pkt seq and with previous_delta offset 295 * for all packets before most recent resized pkt seq. 296 */ 297 if (vseq->delta || vseq->previous_delta) { 298 if(after(seq, vseq->init_seq)) { 299 th->seq = htonl(seq + vseq->delta); 300 IP_VS_DBG(9, "%s(): added delta (%d) to seq\n", 301 __func__, vseq->delta); 302 } else { 303 th->seq = htonl(seq + vseq->previous_delta); 304 IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n", 305 __func__, vseq->previous_delta); 306 } 307 } 308 } 309 310 311 /* 312 * Fixes th->ack_seq based on ip_vs_seq info. 313 */ 314 static inline void 315 vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th) 316 { 317 __u32 ack_seq = ntohl(th->ack_seq); 318 319 /* 320 * Adjust ack_seq with delta-offset for 321 * the packets AFTER most recent resized pkt has caused a shift 322 * for packets before most recent resized pkt, use previous_delta 323 */ 324 if (vseq->delta || vseq->previous_delta) { 325 /* since ack_seq is the number of octet that is expected 326 to receive next, so compare it with init_seq+delta */ 327 if(after(ack_seq, vseq->init_seq+vseq->delta)) { 328 th->ack_seq = htonl(ack_seq - vseq->delta); 329 IP_VS_DBG(9, "%s(): subtracted delta " 330 "(%d) from ack_seq\n", __func__, vseq->delta); 331 332 } else { 333 th->ack_seq = htonl(ack_seq - vseq->previous_delta); 334 IP_VS_DBG(9, "%s(): subtracted " 335 "previous_delta (%d) from ack_seq\n", 336 __func__, vseq->previous_delta); 337 } 338 } 339 } 340 341 342 /* 343 * Updates ip_vs_seq if pkt has been resized 344 * Assumes already checked proto==IPPROTO_TCP and diff!=0. 345 */ 346 static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, 347 unsigned int flag, __u32 seq, int diff) 348 { 349 /* spinlock is to keep updating cp->flags atomic */ 350 spin_lock_bh(&cp->lock); 351 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { 352 vseq->previous_delta = vseq->delta; 353 vseq->delta += diff; 354 vseq->init_seq = seq; 355 cp->flags |= flag; 356 } 357 spin_unlock_bh(&cp->lock); 358 } 359 360 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 361 struct ip_vs_app *app, 362 struct ip_vs_iphdr *ipvsh) 363 { 364 int diff; 365 const unsigned int tcp_offset = ip_hdrlen(skb); 366 struct tcphdr *th; 367 __u32 seq; 368 369 if (skb_ensure_writable(skb, tcp_offset + sizeof(*th))) 370 return 0; 371 372 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 373 374 /* 375 * Remember seq number in case this pkt gets resized 376 */ 377 seq = ntohl(th->seq); 378 379 /* 380 * Fix seq stuff if flagged as so. 381 */ 382 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 383 vs_fix_seq(&cp->out_seq, th); 384 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 385 vs_fix_ack_seq(&cp->in_seq, th); 386 387 /* 388 * Call private output hook function 389 */ 390 if (app->pkt_out == NULL) 391 return 1; 392 393 if (!app->pkt_out(app, cp, skb, &diff, ipvsh)) 394 return 0; 395 396 /* 397 * Update ip_vs seq stuff if len has changed. 398 */ 399 if (diff != 0) 400 vs_seq_update(cp, &cp->out_seq, 401 IP_VS_CONN_F_OUT_SEQ, seq, diff); 402 403 return 1; 404 } 405 406 /* 407 * Output pkt hook. Will call bound ip_vs_app specific function 408 * called by ipvs packet handler, assumes previously checked cp!=NULL 409 * returns false if it can't handle packet (oom) 410 */ 411 int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 412 struct ip_vs_iphdr *ipvsh) 413 { 414 struct ip_vs_app *app; 415 416 /* 417 * check if application module is bound to 418 * this ip_vs_conn. 419 */ 420 if ((app = cp->app) == NULL) 421 return 1; 422 423 /* TCP is complicated */ 424 if (cp->protocol == IPPROTO_TCP) 425 return app_tcp_pkt_out(cp, skb, app, ipvsh); 426 427 /* 428 * Call private output hook function 429 */ 430 if (app->pkt_out == NULL) 431 return 1; 432 433 return app->pkt_out(app, cp, skb, NULL, ipvsh); 434 } 435 436 437 static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 438 struct ip_vs_app *app, 439 struct ip_vs_iphdr *ipvsh) 440 { 441 int diff; 442 const unsigned int tcp_offset = ip_hdrlen(skb); 443 struct tcphdr *th; 444 __u32 seq; 445 446 if (skb_ensure_writable(skb, tcp_offset + sizeof(*th))) 447 return 0; 448 449 th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); 450 451 /* 452 * Remember seq number in case this pkt gets resized 453 */ 454 seq = ntohl(th->seq); 455 456 /* 457 * Fix seq stuff if flagged as so. 458 */ 459 if (cp->flags & IP_VS_CONN_F_IN_SEQ) 460 vs_fix_seq(&cp->in_seq, th); 461 if (cp->flags & IP_VS_CONN_F_OUT_SEQ) 462 vs_fix_ack_seq(&cp->out_seq, th); 463 464 /* 465 * Call private input hook function 466 */ 467 if (app->pkt_in == NULL) 468 return 1; 469 470 if (!app->pkt_in(app, cp, skb, &diff, ipvsh)) 471 return 0; 472 473 /* 474 * Update ip_vs seq stuff if len has changed. 475 */ 476 if (diff != 0) 477 vs_seq_update(cp, &cp->in_seq, 478 IP_VS_CONN_F_IN_SEQ, seq, diff); 479 480 return 1; 481 } 482 483 /* 484 * Input pkt hook. Will call bound ip_vs_app specific function 485 * called by ipvs packet handler, assumes previously checked cp!=NULL. 486 * returns false if can't handle packet (oom). 487 */ 488 int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 489 struct ip_vs_iphdr *ipvsh) 490 { 491 struct ip_vs_app *app; 492 493 /* 494 * check if application module is bound to 495 * this ip_vs_conn. 496 */ 497 if ((app = cp->app) == NULL) 498 return 1; 499 500 /* TCP is complicated */ 501 if (cp->protocol == IPPROTO_TCP) 502 return app_tcp_pkt_in(cp, skb, app, ipvsh); 503 504 /* 505 * Call private input hook function 506 */ 507 if (app->pkt_in == NULL) 508 return 1; 509 510 return app->pkt_in(app, cp, skb, NULL, ipvsh); 511 } 512 513 514 #ifdef CONFIG_PROC_FS 515 /* 516 * /proc/net/ip_vs_app entry function 517 */ 518 519 static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) 520 { 521 struct ip_vs_app *app, *inc; 522 523 list_for_each_entry(app, &ipvs->app_list, a_list) { 524 list_for_each_entry(inc, &app->incs_list, a_list) { 525 if (pos-- == 0) 526 return inc; 527 } 528 } 529 return NULL; 530 531 } 532 533 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) 534 { 535 struct net *net = seq_file_net(seq); 536 struct netns_ipvs *ipvs = net_ipvs(net); 537 538 mutex_lock(&__ip_vs_app_mutex); 539 540 return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; 541 } 542 543 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) 544 { 545 struct ip_vs_app *inc, *app; 546 struct list_head *e; 547 struct net *net = seq_file_net(seq); 548 struct netns_ipvs *ipvs = net_ipvs(net); 549 550 ++*pos; 551 if (v == SEQ_START_TOKEN) 552 return ip_vs_app_idx(ipvs, 0); 553 554 inc = v; 555 app = inc->app; 556 557 if ((e = inc->a_list.next) != &app->incs_list) 558 return list_entry(e, struct ip_vs_app, a_list); 559 560 /* go on to next application */ 561 for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { 562 app = list_entry(e, struct ip_vs_app, a_list); 563 list_for_each_entry(inc, &app->incs_list, a_list) { 564 return inc; 565 } 566 } 567 return NULL; 568 } 569 570 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) 571 { 572 mutex_unlock(&__ip_vs_app_mutex); 573 } 574 575 static int ip_vs_app_seq_show(struct seq_file *seq, void *v) 576 { 577 if (v == SEQ_START_TOKEN) 578 seq_puts(seq, "prot port usecnt name\n"); 579 else { 580 const struct ip_vs_app *inc = v; 581 582 seq_printf(seq, "%-3s %-7u %-6d %-17s\n", 583 ip_vs_proto_name(inc->protocol), 584 ntohs(inc->port), 585 atomic_read(&inc->usecnt), 586 inc->name); 587 } 588 return 0; 589 } 590 591 static const struct seq_operations ip_vs_app_seq_ops = { 592 .start = ip_vs_app_seq_start, 593 .next = ip_vs_app_seq_next, 594 .stop = ip_vs_app_seq_stop, 595 .show = ip_vs_app_seq_show, 596 }; 597 #endif 598 599 int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) 600 { 601 INIT_LIST_HEAD(&ipvs->app_list); 602 #ifdef CONFIG_PROC_FS 603 if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, 604 &ip_vs_app_seq_ops, 605 sizeof(struct seq_net_private))) 606 return -ENOMEM; 607 #endif 608 return 0; 609 } 610 611 void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) 612 { 613 unregister_ip_vs_app(ipvs, NULL /* all */); 614 #ifdef CONFIG_PROC_FS 615 remove_proc_entry("ip_vs_app", ipvs->net->proc_net); 616 #endif 617 } 618
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.